From 7c63431cc22c68742a6a42d3304fdb68431247c3 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Thu, 7 Mar 2024 08:37:47 +0100 Subject: [PATCH 001/158] [mlir][EmitC] Introduce a `CExpression` trait (#84177) This adds a `CExpression` trait and replaces the `isCExpression()` function. --- mlir/include/mlir/Dialect/EmitC/IR/EmitC.h | 1 + mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 39 +++++++++---------- .../mlir/Dialect/EmitC/IR/EmitCTraits.h | 30 ++++++++++++++ mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 3 +- .../EmitC/Transforms/FormExpressions.cpp | 2 +- .../Dialect/EmitC/Transforms/Transforms.cpp | 3 +- 6 files changed, 54 insertions(+), 24 deletions(-) create mode 100644 mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h index 3d38744527d599..1f0df3cb336b12 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h @@ -14,6 +14,7 @@ #define MLIR_DIALECT_EMITC_IR_EMITC_H #include "mlir/Bytecode/BytecodeOpInterface.h" +#include "mlir/Dialect/EmitC/IR/EmitCTraits.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index 6bef395e94eb9d..02ab73fa2ca56b 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -47,11 +47,14 @@ class EmitC_BinaryOp traits = []> : let assemblyFormat = "operands attr-dict `:` functional-type(operands, results)"; } +// EmitC OpTrait +def CExpression : NativeOpTrait<"emitc::CExpression">; + // Types only used in binary arithmetic operations. def IntegerIndexOrOpaqueType : AnyTypeOf<[AnyInteger, Index, EmitC_OpaqueType]>; def FloatIntegerIndexOrOpaqueType : AnyTypeOf<[AnyFloat, IntegerIndexOrOpaqueType]>; -def EmitC_AddOp : EmitC_BinaryOp<"add", []> { +def EmitC_AddOp : EmitC_BinaryOp<"add", [CExpression]> { let summary = "Addition operation"; let description = [{ With the `add` operation the arithmetic operator + (addition) can @@ -74,7 +77,7 @@ def EmitC_AddOp : EmitC_BinaryOp<"add", []> { let hasVerifier = 1; } -def EmitC_ApplyOp : EmitC_Op<"apply", []> { +def EmitC_ApplyOp : EmitC_Op<"apply", [CExpression]> { let summary = "Apply operation"; let description = [{ With the `apply` operation the operators & (address of) and * (contents of) @@ -211,7 +214,7 @@ def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", []> { }]; } -def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", []> { +def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", [CExpression]> { let summary = "Opaque call operation"; let description = [{ The `call_opaque` operation represents a C++ function call. The callee @@ -257,10 +260,10 @@ def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", []> { let hasVerifier = 1; } -def EmitC_CastOp : EmitC_Op<"cast", [ - DeclareOpInterfaceMethods, - SameOperandsAndResultShape - ]> { +def EmitC_CastOp : EmitC_Op<"cast", + [CExpression, + DeclareOpInterfaceMethods, + SameOperandsAndResultShape]> { let summary = "Cast operation"; let description = [{ The `cast` operation performs an explicit type conversion and is emitted @@ -284,7 +287,7 @@ def EmitC_CastOp : EmitC_Op<"cast", [ let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; } -def EmitC_CmpOp : EmitC_BinaryOp<"cmp", []> { +def EmitC_CmpOp : EmitC_BinaryOp<"cmp", [CExpression]> { let summary = "Comparison operation"; let description = [{ With the `cmp` operation the comparison operators ==, !=, <, <=, >, >=, <=> @@ -355,7 +358,7 @@ def EmitC_ConstantOp : EmitC_Op<"constant", [ConstantLike]> { let hasVerifier = 1; } -def EmitC_DivOp : EmitC_BinaryOp<"div", []> { +def EmitC_DivOp : EmitC_BinaryOp<"div", [CExpression]> { let summary = "Division operation"; let description = [{ With the `div` operation the arithmetic operator / (division) can @@ -409,9 +412,8 @@ def EmitC_ExpressionOp : EmitC_Op<"expression", int32_t v7 = foo(v1 + v2) * (v3 + v4); ``` - The operations allowed within expression body are `emitc.add`, - `emitc.apply`, `emitc.call_opaque`, `emitc.cast`, `emitc.cmp`, `emitc.div`, - `emitc.mul`, `emitc.rem`, and `emitc.sub`. + The operations allowed within expression body are EmitC operations with the + CExpression trait. When specified, the optional `do_not_inline` indicates that the expression is to be emitted as seen above, i.e. as the rhs of an EmitC SSA value @@ -427,14 +429,9 @@ def EmitC_ExpressionOp : EmitC_Op<"expression", let assemblyFormat = "attr-dict (`noinline` $do_not_inline^)? `:` type($result) $region"; let extraClassDeclaration = [{ - static bool isCExpression(Operation &op) { - return isa(op); - } bool hasSideEffects() { auto predicate = [](Operation &op) { - assert(isCExpression(op) && "Expected a C expression"); + assert(op.hasTrait() && "Expected a C expression"); // Conservatively assume calls to read and write memory. if (isa(op)) return true; @@ -837,7 +834,7 @@ def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", []> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_MulOp : EmitC_BinaryOp<"mul", []> { +def EmitC_MulOp : EmitC_BinaryOp<"mul", [CExpression]> { let summary = "Multiplication operation"; let description = [{ With the `mul` operation the arithmetic operator * (multiplication) can @@ -861,7 +858,7 @@ def EmitC_MulOp : EmitC_BinaryOp<"mul", []> { let results = (outs FloatIntegerIndexOrOpaqueType); } -def EmitC_RemOp : EmitC_BinaryOp<"rem", []> { +def EmitC_RemOp : EmitC_BinaryOp<"rem", [CExpression]> { let summary = "Remainder operation"; let description = [{ With the `rem` operation the arithmetic operator % (remainder) can @@ -883,7 +880,7 @@ def EmitC_RemOp : EmitC_BinaryOp<"rem", []> { let results = (outs IntegerIndexOrOpaqueType); } -def EmitC_SubOp : EmitC_BinaryOp<"sub", []> { +def EmitC_SubOp : EmitC_BinaryOp<"sub", [CExpression]> { let summary = "Subtraction operation"; let description = [{ With the `sub` operation the arithmetic operator - (subtraction) can diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h new file mode 100644 index 00000000000000..c1602dfce4b484 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h @@ -0,0 +1,30 @@ +//===- EmitCTraits.h - EmitC trait definitions ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares C++ classes for some of the traits used in the EmitC +// dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H +#define MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H + +#include "mlir/IR/OpDefinition.h" + +namespace mlir { +namespace OpTrait { +namespace emitc { + +template +class CExpression : public TraitBase {}; + +} // namespace emitc +} // namespace OpTrait +} // namespace mlir + +#endif // MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 4df8149b94c95f..07ee1d394287b9 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Dialect/EmitC/IR/EmitCTraits.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectImplementation.h" @@ -244,7 +245,7 @@ LogicalResult ExpressionOp::verify() { return emitOpError("requires yielded type to match return type"); for (Operation &op : region.front().without_terminator()) { - if (!isCExpression(op)) + if (!op.hasTrait()) return emitOpError("contains an unsupported operation"); if (op.getNumResults() != 1) return emitOpError("requires exactly one result for each operation"); diff --git a/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp b/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp index 21212155ffb22f..5b03f81b305fd5 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp @@ -36,7 +36,7 @@ struct FormExpressionsPass // Wrap each C operator op with an expression op. OpBuilder builder(context); auto matchFun = [&](Operation *op) { - if (emitc::ExpressionOp::isCExpression(*op)) + if (op->hasTrait()) createExpression(op, builder); }; rootOp->walk(matchFun); diff --git a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp index 88b691b50f325d..87350ecdceaaac 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp @@ -16,7 +16,8 @@ namespace mlir { namespace emitc { ExpressionOp createExpression(Operation *op, OpBuilder &builder) { - assert(ExpressionOp::isCExpression(*op) && "Expected a C expression"); + assert(op->hasTrait() && + "Expected a C expression"); // Create an expression yielding the value returned by op. assert(op->getNumResults() == 1 && "Expected exactly one result"); From 7524ad9aa7b1b5003fe554a6ac8e434d50027dfb Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 7 Mar 2024 00:10:14 -0800 Subject: [PATCH 002/158] [AArch64][GlobalISel] Fix incorrect selection of monotonic s32->s64 anyext load. This load isn't selected by tablegen due to the anyext, but wasn't generating a subreg_to_reg. Maybe it shouldn't be formed at all during the combiner but to stop crashes later in codegen select it manually for now. --- .../GISel/AArch64InstructionSelector.cpp | 9 ++--- .../GlobalISel/select-atomic-load-store.mir | 33 ++++++++++++++++--- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 6652883792391b..48b73dced09ba0 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2997,13 +2997,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } } - if (IsZExtLoad) { - // The zextload from a smaller type to i32 should be handled by the + if (IsZExtLoad || + (isa(LdSt) && ValTy == LLT::scalar(64) && MemSizeInBits == 32)) { + // The any/zextload from a smaller type to i32 should be handled by the // importer. if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) return false; - // If we have a ZEXTLOAD then change the load's type to be a narrower reg - // and zero_extend with SUBREG_TO_REG. + // If we have an extending load then change the load's type to be a + // narrower reg and zero_extend with SUBREG_TO_REG. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); Register DstReg = LoadStore->getOperand(0).getReg(); LoadStore->getOperand(0).setReg(LdReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir index 5787f914b965d3..6b4bbb85b2ec44 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir @@ -9,6 +9,11 @@ ret i8 %v } + define i32 @anyext_load_monotonic_i32() { + %v = load atomic i32, ptr null monotonic, align 4 + ret i32 %v + } + ... --- name: load_acq_i8 @@ -25,13 +30,33 @@ body: | ; CHECK-LABEL: name: load_acq_i8 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8) from %ir.ptr, align 8) - ; CHECK: $w0 = COPY [[LDARB]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8) from %ir.ptr, align 8) + ; CHECK-NEXT: $w0 = COPY [[LDARB]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 %2:gpr(s32) = G_LOAD %0(p0) :: (load acquire (s8) from %ir.ptr, align 8) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 ... +--- +name: anyext_load_monotonic_i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: anyext_load_monotonic_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $xzr + ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load monotonic (s32) from `ptr null`) + ; CHECK-NEXT: %ld:gpr64all = SUBREG_TO_REG 0, [[LDRWui]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY %ld + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %1:gpr(p0) = G_CONSTANT i64 0 + %ld:gpr(s64) = G_LOAD %1(p0) :: (load monotonic (s32) from `ptr null`) + $x0 = COPY %ld(s64) + RET_ReallyLR implicit $x0 + +... From 7a0e222a17058a311b69153d0b6f1b4459414778 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Mar 2024 08:16:52 +0000 Subject: [PATCH 003/158] Revert "Convert many LivePhysRegs uses to LiveRegUnits (#83905)" This reverts commit 2a13422b8bcee449405e3ebff957b4020805f91c. It was causing test failures on the expensive check builders. --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 24 +++++----- .../Target/AArch64/AArch64FrameLowering.cpp | 28 ++++++----- .../Target/AMDGPU/SIOptimizeExecMasking.cpp | 8 ++-- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 6 +-- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 6 +-- .../lib/Target/SystemZ/SystemZElimCompare.cpp | 6 +-- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 6 +-- .../lib/Target/SystemZ/SystemZShortenInst.cpp | 8 ++-- llvm/lib/Target/X86/X86FloatingPoint.cpp | 8 ++-- .../CodeGen/AArch64/arm64-shrink-wrapping.ll | 48 ++++++++++--------- .../AArch64/stack-probing-no-scratch-reg.mir | 38 +++++++-------- llvm/test/CodeGen/Thumb/PR35481.ll | 14 +++--- 12 files changed, 104 insertions(+), 96 deletions(-) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 07fa92889d8853..61a668907be77d 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ReachingDefAnalysis.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" @@ -421,9 +421,9 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, return; VisitedBBs.insert(MBB); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return; if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -469,11 +469,11 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); // Yes if the register is live out of the basic block. - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return true; // Walk backwards through the block to see if the register is live at some @@ -481,7 +481,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, for (MachineInstr &Last : instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { LiveRegs.stepBackward(Last); - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; @@ -504,9 +504,9 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return false; auto Last = MBB->getLastNonDebugInstr(); @@ -525,9 +525,9 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MachineInstr * ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const { - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return nullptr; auto Last = MBB->getLastNonDebugInstr(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 11cf42bbc80e85..5cc612e89162af 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -197,7 +197,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -989,7 +988,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, } } -static void getLiveRegsForEntryMBB(LiveRegUnits &LiveRegs, +static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { const MachineFunction *MF = MBB.getParent(); LiveRegs.addLiveIns(MBB); @@ -1019,15 +1018,16 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); - LiveRegUnits LiveRegs(TRI); + LivePhysRegs LiveRegs(TRI); getLiveRegsForEntryMBB(LiveRegs, *MBB); // Prefer X9 since it was historically used for the prologue scratch reg. - if (LiveRegs.available(AArch64::X9)) + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (LiveRegs.available(MRI, AArch64::X9)) return AArch64::X9; - for (Register Reg : AArch64::GPR64RegClass) { - if (LiveRegs.available(Reg)) + for (unsigned Reg : AArch64::GPR64RegClass) { + if (LiveRegs.available(MRI, Reg)) return Reg; } return AArch64::NoRegister; @@ -1044,11 +1044,13 @@ bool AArch64FrameLowering::canUseAsPrologue( if (AFI->hasSwiftAsyncContext()) { const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); - LiveRegUnits LiveRegs(TRI); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + LivePhysRegs LiveRegs(TRI); getLiveRegsForEntryMBB(LiveRegs, MBB); // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are // available. - if (!LiveRegs.available(AArch64::X16) || !LiveRegs.available(AArch64::X17)) + if (!LiveRegs.available(MRI, AArch64::X16) || + !LiveRegs.available(MRI, AArch64::X17)) return false; } @@ -1601,7 +1603,7 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, /// Collect live registers from the end of \p MI's parent up to (including) \p /// MI in \p LiveRegs. static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, - LiveRegUnits &LiveRegs) { + LivePhysRegs &LiveRegs) { MachineBasicBlock &MBB = *MI.getParent(); LiveRegs.addLiveOuts(MBB); @@ -1639,7 +1641,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NonFrameStart->getFlag(MachineInstr::FrameSetup)) ++NonFrameStart; - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); if (NonFrameStart != MBB.end()) { getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs); // Ignore registers used for stack management for now. @@ -1657,7 +1659,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, make_range(MBB.instr_begin(), NonFrameStart->getIterator())) { for (auto &Op : MI.operands()) if (Op.isReg() && Op.isDef()) - assert(LiveRegs.available(Op.getReg()) && + assert(!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"); } }); @@ -4012,7 +4014,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, // FIXME : This approach of bailing out from merge is conservative in // some ways like even if stg loops are not present after merge the // insert list, this liveness check is done (which is not needed). - LiveRegUnits LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); + LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); LiveRegs.addLiveOuts(*MBB); for (auto I = MBB->rbegin();; ++I) { MachineInstr &MI = *I; @@ -4021,7 +4023,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, LiveRegs.stepBackward(*I); } InsertI++; - if (!LiveRegs.available(AArch64::NZCV)) + if (LiveRegs.contains(AArch64::NZCV)) return InsertI; llvm::stable_sort(Instrs, diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index d510e729512571..e3f54d01eb22a2 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -11,7 +11,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -313,7 +313,7 @@ MachineBasicBlock::reverse_iterator SIOptimizeExecMasking::findExecCopy( return E; } -// XXX - Seems LiveRegUnits doesn't work correctly since it will incorrectly +// XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly // report the register as unavailable because a super-register with a lane mask // is unavailable. static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { @@ -383,7 +383,7 @@ bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, MCRegister Reg, bool UseLiveOuts, bool IgnoreStart) const { - LiveRegUnits LR(*TRI); + LivePhysRegs LR(*TRI); if (UseLiveOuts) LR.addLiveOuts(*Stop.getParent()); @@ -396,7 +396,7 @@ bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, LR.stepBackward(*A); } - return !LR.available(Reg); + return !LR.available(*MRI, Reg); } // Determine if a register Reg is not re-defined and still in use diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 9bcf0007974485..6121055eb02176 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -31,7 +31,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -109,7 +109,7 @@ namespace { const ARMSubtarget *STI; const TargetLowering *TL; ARMFunctionInfo *AFI; - LiveRegUnits LiveRegs; + LivePhysRegs LiveRegs; RegisterClassInfo RegClassInfo; MachineBasicBlock::const_iterator LiveRegPos; bool LiveRegsValid; @@ -589,7 +589,7 @@ unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { } for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) - if (LiveRegs.available(Reg)) + if (LiveRegs.available(MF->getRegInfo(), Reg)) return Reg; return 0; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index a8cf036f363cdd..0f4ece64bff532 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -612,11 +612,11 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { static void findTemporariesForLR(const BitVector &GPRsNoLRSP, const BitVector &PopFriendly, - const LiveRegUnits &UsedRegs, unsigned &PopReg, + const LivePhysRegs &UsedRegs, unsigned &PopReg, unsigned &TmpReg, MachineRegisterInfo &MRI) { PopReg = TmpReg = 0; for (auto Reg : GPRsNoLRSP.set_bits()) { - if (UsedRegs.available(Reg)) { + if (UsedRegs.available(MRI, Reg)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Reg)) { PopReg = Reg; @@ -684,7 +684,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - LiveRegUnits UsedRegs(TRI); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index e58f50e471fc0e..7423ed429ffb68 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -690,9 +690,9 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(MBB); - bool CompleteCCUsers = LiveRegs.available(SystemZ::CC); + bool CompleteCCUsers = !LiveRegs.contains(SystemZ::CC); SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 53e9bf9a9d1bb0..046a12208467b4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -1874,9 +1874,9 @@ prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { } } if (CCLive) { - LiveRegUnits LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); LiveRegs.addLiveOuts(*MBB); - if (!LiveRegs.available(SystemZ::CC)) + if (LiveRegs.contains(SystemZ::CC)) return false; } diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index c0adfdbf120bdf..30b22fa1ce92de 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -46,7 +46,7 @@ class SystemZShortenInst : public MachineFunctionPass { const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; - LiveRegUnits LiveRegs; + LivePhysRegs LiveRegs; }; char SystemZShortenInst::ID = 0; @@ -88,7 +88,7 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); - if (!LiveRegs.available(OtherReg)) + if (LiveRegs.contains(OtherReg)) return false; uint64_t Imm = MI.getOperand(1).getImm(); @@ -143,7 +143,7 @@ bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { // Calls shortenOn001 if CCLive is false. CC def operand is added in // case of success. bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) { - if (LiveRegs.available(SystemZ::CC) && shortenOn001(MI, Opcode)) { + if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) { MachineInstrBuilder(*MI.getParent()->getParent(), &MI) .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); return true; diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 260879ffaa4f12..ca4d03913d093e 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -30,7 +30,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1751,7 +1751,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { void FPS::setKillFlags(MachineBasicBlock &MBB) const { const TargetRegisterInfo &TRI = *MBB.getParent()->getSubtarget().getRegisterInfo(); - LiveRegUnits LPR(TRI); + LivePhysRegs LPR(TRI); LPR.addLiveOuts(MBB); @@ -1773,14 +1773,14 @@ void FPS::setKillFlags(MachineBasicBlock &MBB) const { if (MO.isDef()) { Defs.set(Reg); - if (LPR.available(MO.getReg())) + if (!LPR.contains(MO.getReg())) MO.setIsDead(); } else Uses.push_back(&MO); } for (auto *MO : Uses) - if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg())) + if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg())) MO->setIsKill(); LPR.stepBackward(MI); diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index a5fcbf764b64fb..5806bcf0dacf16 100644 --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1028,22 +1028,14 @@ false: ret i32 %tmp.0 } -; Re-aligned stack pointer with all caller-save regs live. +; Re-aligned stack pointer with all caller-save regs live. See bug +; 26642. In this case we currently avoid shrink wrapping because +; ensuring we have a scratch register to re-align the stack pointer is +; too complicated. Output should be the same for both enabled and +; disabled shrink wrapping. define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5, ptr %ptr6) { ; ENABLE-LABEL: stack_realign2: ; ENABLE: ; %bb.0: -; ENABLE-NEXT: lsl w8, w1, w0 -; ENABLE-NEXT: lsr w9, w0, w1 -; ENABLE-NEXT: lsl w14, w0, w1 -; ENABLE-NEXT: lsr w11, w1, w0 -; ENABLE-NEXT: add w15, w1, w0 -; ENABLE-NEXT: sub w10, w8, w9 -; ENABLE-NEXT: subs w17, w1, w0 -; ENABLE-NEXT: add w16, w14, w8 -; ENABLE-NEXT: add w12, w9, w11 -; ENABLE-NEXT: add w13, w11, w15 -; ENABLE-NEXT: b.le LBB14_2 -; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill ; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill ; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill @@ -1051,8 +1043,8 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill ; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill ; ENABLE-NEXT: add x29, sp, #80 -; ENABLE-NEXT: sub x18, sp, #32 -; ENABLE-NEXT: and sp, x18, #0xffffffffffffffe0 +; ENABLE-NEXT: sub x9, sp, #32 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 ; ENABLE-NEXT: .cfi_def_cfa w29, 16 ; ENABLE-NEXT: .cfi_offset w30, -8 ; ENABLE-NEXT: .cfi_offset w29, -16 @@ -1066,17 +1058,22 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: .cfi_offset w26, -80 ; ENABLE-NEXT: .cfi_offset w27, -88 ; ENABLE-NEXT: .cfi_offset w28, -96 +; ENABLE-NEXT: lsl w8, w1, w0 +; ENABLE-NEXT: lsr w9, w0, w1 +; ENABLE-NEXT: lsl w14, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w15, w1, w0 +; ENABLE-NEXT: sub w10, w8, w9 +; ENABLE-NEXT: subs w17, w1, w0 +; ENABLE-NEXT: add w16, w14, w8 +; ENABLE-NEXT: add w12, w9, w11 +; ENABLE-NEXT: add w13, w11, w15 +; ENABLE-NEXT: b.le LBB14_2 +; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End -; ENABLE-NEXT: sub sp, x29, #80 -; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload ; ENABLE-NEXT: LBB14_2: ; %false ; ENABLE-NEXT: str w14, [x2] ; ENABLE-NEXT: str w8, [x3] @@ -1087,6 +1084,13 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: stp w0, w1, [x2, #4] ; ENABLE-NEXT: stp w16, w10, [x2, #12] ; ENABLE-NEXT: stp w12, w13, [x2, #20] +; ENABLE-NEXT: sub sp, x29, #80 +; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: stack_realign2: diff --git a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir index 078d8a5bf6b66e..f50bd9ab4b8a1b 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir +++ b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir @@ -43,43 +43,43 @@ machineFunctionInfo: {} body: | ; CHECK-LABEL: name: f ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $w0, $lr - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x9 = IMPLICIT_DEF - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.if.then1: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28, $lr + ; CHECK-NEXT: liveins: $w0, $lr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) - ; CHECK-NEXT: $xzr = frame-setup SUBXri $sp, 36, 12 + ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 36, 12 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.if.then1: + ; CHECK-NEXT: bb.3.entry: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 - ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $xzr, 24, implicit-def $nzcv + ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.if.then1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2544, 0 ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: $x9 = IMPLICIT_DEF + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0 ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.exit: ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 36, 12 ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2544, 0 ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.exit: - ; CHECK-NEXT: liveins: $lr - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET_ReallyLR bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/Thumb/PR35481.ll b/llvm/test/CodeGen/Thumb/PR35481.ll index e48d1547782caf..ad3215ecb94952 100644 --- a/llvm/test/CodeGen/Thumb/PR35481.ll +++ b/llvm/test/CodeGen/Thumb/PR35481.ll @@ -18,10 +18,11 @@ define <4 x i32> @f() local_unnamed_addr #0 { ; CHECK-V4T-NEXT: movs r2, #3 ; CHECK-V4T-NEXT: movs r3, #4 ; CHECK-V4T-NEXT: bl g -; CHECK-V4T-NEXT: ldr r7, [sp, #4] -; CHECK-V4T-NEXT: mov lr, r7 ; CHECK-V4T-NEXT: pop {r7} -; CHECK-V4T-NEXT: add sp, #4 +; CHECK-V4T-NEXT: mov r12, r0 +; CHECK-V4T-NEXT: pop {r0} +; CHECK-V4T-NEXT: mov lr, r0 +; CHECK-V4T-NEXT: mov r0, r12 ; CHECK-V4T-NEXT: bx lr ; ; CHECK-V8M-LABEL: f: @@ -35,10 +36,11 @@ define <4 x i32> @f() local_unnamed_addr #0 { ; CHECK-V8M-NEXT: movs r1, #2 ; CHECK-V8M-NEXT: movs r2, #3 ; CHECK-V8M-NEXT: movs r3, #4 -; CHECK-V8M-NEXT: ldr r7, [sp, #4] -; CHECK-V8M-NEXT: mov lr, r7 ; CHECK-V8M-NEXT: pop {r7} -; CHECK-V8M-NEXT: add sp, #4 +; CHECK-V8M-NEXT: mov r12, r0 +; CHECK-V8M-NEXT: pop {r0} +; CHECK-V8M-NEXT: mov lr, r0 +; CHECK-V8M-NEXT: mov r0, r12 ; CHECK-V8M-NEXT: b g entry: %call = tail call i32 @h(i32 1) From 91808c845fd6f0624525b6d6348b2c284628ce58 Mon Sep 17 00:00:00 2001 From: Michal Paszkowski Date: Thu, 7 Mar 2024 00:22:13 -0800 Subject: [PATCH 004/158] [docs] Change SPIR-V backend meeting day (#84286) Changing SPIR-V backend meeting day and removing my office hours --- llvm/docs/GettingInvolved.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index f89483904ab737..763aeb87c68805 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -215,7 +215,7 @@ what to add to your calendar invite. - `gcal `__ - `Minutes/docs `__ * - LLVM SPIR-V Backend Working Group - - Every week on Thursday + - Every week on Monday - - `Meeting details/agenda `__ * - SYCL Upstream Working Group @@ -305,11 +305,6 @@ The :doc:`CodeOfConduct` applies to all office hours. - Monthly, 2nd Wednesday of the month at 11:00am PT, for 30 minutes. - `Zoom `__ - English, Russian - * - Michal Paszkowski - - SPIR-V backend, IGC, OpenCL, and IR transformations - - Monthly, 3rd Thursday of the month at 21:00 Warsaw/Poland time, 1 hour slot. - - `MS Teams `__ - - English, Polish * - Quentin Colombet (he/him) - LLVM/MLIR; Codegen (Instruction selection (GlobalISel/SDISel), Machine IR, Register allocation, etc.); Optimizations; MCA From 99500e8c08a4d941acb8a7eb00523296fb2acf7a Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Thu, 7 Mar 2024 11:36:50 +0300 Subject: [PATCH 005/158] [Clang][C++23] Implement P2448R2: Relaxing some constexpr restrictions (#77753) Per https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2448r2.html function/constructor/destructor can be marked `constexpr` even though it never produces a constant expression. Non-literal types as return types and parameter types of functions marked `constexpr` are also allowed. Since this is not a DR, the diagnostic messages are still preserved for C++ standards older than C++23. --- clang/docs/ReleaseNotes.rst | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 26 +-- clang/lib/AST/DeclCXX.cpp | 13 +- clang/lib/Sema/SemaDeclCXX.cpp | 93 +++++----- clang/test/AST/Interp/cxx23.cpp | 59 ++----- .../class.compare.default/p3.cpp | 40 ++--- .../class.compare.default/p4.cpp | 20 +-- .../dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp | 8 +- .../dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp | 10 +- .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp | 18 +- .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp | 8 +- .../dcl.fct.def/dcl.fct.def.default/p2.cpp | 6 +- clang/test/CXX/drs/dr13xx.cpp | 22 +-- clang/test/CXX/drs/dr14xx.cpp | 6 +- clang/test/CXX/drs/dr15xx.cpp | 21 ++- clang/test/CXX/drs/dr16xx.cpp | 20 +-- clang/test/CXX/drs/dr6xx.cpp | 24 +-- clang/test/CXX/expr/expr.const/p5-26.cpp | 4 +- clang/test/CXX/special/class.copy/p13-0x.cpp | 2 +- .../SemaCXX/constant-expression-cxx11.cpp | 38 +++-- .../SemaCXX/constant-expression-cxx14.cpp | 33 ++-- .../SemaCXX/constant-expression-cxx2b.cpp | 24 +-- .../test/SemaCXX/cxx23-invalid-constexpr.cpp | 159 ++++++++++++++++++ clang/test/SemaCXX/cxx2a-consteval.cpp | 2 +- .../SemaCXX/deduced-return-type-cxx14.cpp | 8 +- .../addrspace-constructors.clcpp | 2 +- clang/www/cxx_status.html | 9 +- 27 files changed, 408 insertions(+), 269 deletions(-) create mode 100644 clang/test/SemaCXX/cxx23-invalid-constexpr.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8300a8484585ae..1b901a27fd19d1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -97,6 +97,8 @@ C++23 Feature Support - Implemented `P2718R0: Lifetime extension in range-based for loops `_. Also materialize temporary object which is a prvalue in discarded-value expression. +- Implemented `P2448R2: Relaxing some constexpr restrictions `_. + C++2c Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5a90e631a894c9..c8dfdc08f5ea07 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9607,13 +9607,10 @@ def err_defaulted_copy_assign_not_ref : Error< "the parameter for an explicitly-defaulted copy assignment operator must be an " "lvalue reference type">; def err_incorrect_defaulted_constexpr : Error< - "defaulted definition of %sub{select_special_member_kind}0 " - "is not constexpr">; + "defaulted definition of %sub{select_special_member_kind}0 cannot be marked %select{constexpr|consteval}1 " + "before C++23">; def err_incorrect_defaulted_constexpr_with_vb: Error< "%sub{select_special_member_kind}0 cannot be 'constexpr' in a class with virtual base class">; -def err_incorrect_defaulted_consteval : Error< - "defaulted declaration of %sub{select_special_member_kind}0 " - "cannot be consteval because implicit definition is not constexpr">; def warn_defaulted_method_deleted : Warning< "explicitly defaulted %sub{select_special_member_kind}0 is implicitly " "deleted">, InGroup; @@ -9724,21 +9721,12 @@ def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note< "%select{|member|base class}0 %1 declared here">; def note_defaulted_comparison_cannot_deduce_callee : Note< "selected 'operator<=>' for %select{|member|base class}0 %1 declared here">; -def ext_defaulted_comparison_constexpr_mismatch : Extension< +def err_defaulted_comparison_constexpr_mismatch : Error< "defaulted definition of %select{%sub{select_defaulted_comparison_kind}1|" - "three-way comparison operator}0 that is " - "declared %select{constexpr|consteval}2 but" - "%select{|for which the corresponding implicit 'operator==' }0 " - "invokes a non-constexpr comparison function is a C++23 extension">, - InGroup>; -def warn_cxx23_compat_defaulted_comparison_constexpr_mismatch : Warning< - "defaulted definition of %select{%sub{select_defaulted_comparison_kind}1|" - "three-way comparison operator}0 that is " - "declared %select{constexpr|consteval}2 but" - "%select{|for which the corresponding implicit 'operator==' }0 " - "invokes a non-constexpr comparison function is incompatible with C++ " - "standards before C++23">, - InGroup, DefaultIgnore; + "three-way comparison operator}0 cannot be " + "declared %select{constexpr|consteval}2 because " + "%select{it|for which the corresponding implicit 'operator==' }0 " + "invokes a non-constexpr comparison function ">; def note_defaulted_comparison_not_constexpr : Note< "non-constexpr comparison function would be used to compare " "%select{|member %1|base class %1}0">; diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index b4f2327d9c560a..1c3dcf63465c68 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -400,10 +400,11 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, // C++11 [class.ctor]p6: // If that user-written default constructor would satisfy the - // requirements of a constexpr constructor, the implicitly-defined - // default constructor is constexpr. + // requirements of a constexpr constructor/function(C++23), the + // implicitly-defined default constructor is constexpr. if (!BaseClassDecl->hasConstexprDefaultConstructor()) - data().DefaultedDefaultConstructorIsConstexpr = false; + data().DefaultedDefaultConstructorIsConstexpr = + C.getLangOpts().CPlusPlus23; // C++1z [class.copy]p8: // The implicitly-declared copy constructor for a class X will have @@ -548,7 +549,8 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { // -- for every subobject of class type or (possibly multi-dimensional) // array thereof, that class type shall have a constexpr destructor if (!Subobj->hasConstexprDestructor()) - data().DefaultedDestructorIsConstexpr = false; + data().DefaultedDestructorIsConstexpr = + getASTContext().getLangOpts().CPlusPlus23; // C++20 [temp.param]p7: // A structural type is [...] a literal class type [for which] the types @@ -1297,7 +1299,8 @@ void CXXRecordDecl::addedMember(Decl *D) { !FieldRec->hasConstexprDefaultConstructor() && !isUnion()) // The standard requires any in-class initializer to be a constant // expression. We consider this to be a defect. - data().DefaultedDefaultConstructorIsConstexpr = false; + data().DefaultedDefaultConstructorIsConstexpr = + Context.getLangOpts().CPlusPlus23; // C++11 [class.copy]p8: // The implicitly-declared copy constructor for a class X will have diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 199f2523cfb5d2..e258a4f7c89415 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1715,6 +1715,8 @@ static bool CheckLiteralType(Sema &SemaRef, Sema::CheckConstexprKind Kind, static bool CheckConstexprDestructorSubobjects(Sema &SemaRef, const CXXDestructorDecl *DD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); auto Check = [&](SourceLocation Loc, QualType T, const FieldDecl *FD) { const CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); @@ -1746,6 +1748,8 @@ static bool CheckConstexprDestructorSubobjects(Sema &SemaRef, static bool CheckConstexprParameterTypes(Sema &SemaRef, const FunctionDecl *FD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); unsigned ArgIndex = 0; const auto *FT = FD->getType()->castAs(); for (FunctionProtoType::param_type_iterator i = FT->param_type_begin(), @@ -1767,6 +1771,8 @@ static bool CheckConstexprParameterTypes(Sema &SemaRef, /// true. If not, produce a suitable diagnostic and return false. static bool CheckConstexprReturnType(Sema &SemaRef, const FunctionDecl *FD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); if (CheckLiteralType(SemaRef, Kind, FD->getLocation(), FD->getReturnType(), diag::err_constexpr_non_literal_return, FD->isConsteval())) @@ -1856,16 +1862,18 @@ bool Sema::CheckConstexprFunctionDefinition(const FunctionDecl *NewFD, } } - // - its return type shall be a literal type; - if (!CheckConstexprReturnType(*this, NewFD, Kind)) + // - its return type shall be a literal type; (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !CheckConstexprReturnType(*this, NewFD, Kind)) return false; } if (auto *Dtor = dyn_cast(NewFD)) { // A destructor can be constexpr only if the defaulted destructor could be; // we don't need to check the members and bases if we already know they all - // have constexpr destructors. - if (!Dtor->getParent()->defaultedDestructorIsConstexpr()) { + // have constexpr destructors. (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !Dtor->getParent()->defaultedDestructorIsConstexpr()) { if (Kind == CheckConstexprKind::CheckValid) return false; if (!CheckConstexprDestructorSubobjects(*this, Dtor, Kind)) @@ -1873,8 +1881,9 @@ bool Sema::CheckConstexprFunctionDefinition(const FunctionDecl *NewFD, } } - // - each of its parameter types shall be a literal type; - if (!CheckConstexprParameterTypes(*this, NewFD, Kind)) + // - each of its parameter types shall be a literal type; (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !CheckConstexprParameterTypes(*this, NewFD, Kind)) return false; Stmt *Body = NewFD->getBody(); @@ -2457,7 +2466,8 @@ static bool CheckConstexprFunctionBody(Sema &SemaRef, const FunctionDecl *Dcl, // function", so is not checked in CheckValid mode. SmallVector Diags; if (Kind == Sema::CheckConstexprKind::Diagnose && - !Expr::isPotentialConstantExpr(Dcl, Diags)) { + !Expr::isPotentialConstantExpr(Dcl, Diags) && + !SemaRef.getLangOpts().CPlusPlus23) { SemaRef.Diag(Dcl->getLocation(), diag::ext_constexpr_function_never_constant_expr) << isa(Dcl) << Dcl->isConsteval() @@ -7535,21 +7545,23 @@ static bool defaultedSpecialMemberIsConstexpr( // C++1y [class.copy]p26: // -- [the class] is a literal type, and - if (!Ctor && !ClassDecl->isLiteral()) + if (!Ctor && !ClassDecl->isLiteral() && !S.getLangOpts().CPlusPlus23) return false; // -- every constructor involved in initializing [...] base class // sub-objects shall be a constexpr constructor; // -- the assignment operator selected to copy/move each direct base // class is a constexpr function, and - for (const auto &B : ClassDecl->bases()) { - const RecordType *BaseType = B.getType()->getAs(); - if (!BaseType) - continue; - CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); - if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg, - InheritedCtor, Inherited)) - return false; + if (!S.getLangOpts().CPlusPlus23) { + for (const auto &B : ClassDecl->bases()) { + const RecordType *BaseType = B.getType()->getAs(); + if (!BaseType) + continue; + CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); + if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg, + InheritedCtor, Inherited)) + return false; + } } // -- every constructor involved in initializing non-static data members @@ -7559,20 +7571,22 @@ static bool defaultedSpecialMemberIsConstexpr( // -- for each non-static data member of X that is of class type (or array // thereof), the assignment operator selected to copy/move that member is // a constexpr function - for (const auto *F : ClassDecl->fields()) { - if (F->isInvalidDecl()) - continue; - if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer()) - continue; - QualType BaseType = S.Context.getBaseElementType(F->getType()); - if (const RecordType *RecordTy = BaseType->getAs()) { - CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); - if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM, - BaseType.getCVRQualifiers(), - ConstArg && !F->isMutable())) + if (!S.getLangOpts().CPlusPlus23) { + for (const auto *F : ClassDecl->fields()) { + if (F->isInvalidDecl()) + continue; + if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer()) + continue; + QualType BaseType = S.Context.getBaseElementType(F->getType()); + if (const RecordType *RecordTy = BaseType->getAs()) { + CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); + if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM, + BaseType.getCVRQualifiers(), + ConstArg && !F->isMutable())) + return false; + } else if (CSM == Sema::CXXDefaultConstructor) { return false; - } else if (CSM == Sema::CXXDefaultConstructor) { - return false; + } } } @@ -7858,18 +7872,17 @@ bool Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD, MD->isConstexpr() && !Constexpr && MD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { if (!MD->isConsteval() && RD->getNumVBases()) { - Diag(MD->getBeginLoc(), diag::err_incorrect_defaulted_constexpr_with_vb) + Diag(MD->getBeginLoc(), + diag::err_incorrect_defaulted_constexpr_with_vb) << CSM; for (const auto &I : RD->vbases()) Diag(I.getBeginLoc(), diag::note_constexpr_virtual_base_here); } else { - Diag(MD->getBeginLoc(), MD->isConsteval() - ? diag::err_incorrect_defaulted_consteval - : diag::err_incorrect_defaulted_constexpr) - << CSM; + Diag(MD->getBeginLoc(), diag::err_incorrect_defaulted_constexpr) + << CSM << MD->isConsteval(); } - // FIXME: Explain why the special member can't be constexpr. - HadError = true; + HadError = true; + // FIXME: Explain why the special member can't be constexpr. } if (First) { @@ -9101,13 +9114,11 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, // - if the function is a constructor or destructor, its class does not // have any virtual base classes. if (FD->isConstexpr()) { - if (CheckConstexprReturnType(*this, FD, CheckConstexprKind::Diagnose) && + if (!getLangOpts().CPlusPlus23 && + CheckConstexprReturnType(*this, FD, CheckConstexprKind::Diagnose) && CheckConstexprParameterTypes(*this, FD, CheckConstexprKind::Diagnose) && !Info.Constexpr) { - Diag(FD->getBeginLoc(), - getLangOpts().CPlusPlus23 - ? diag::warn_cxx23_compat_defaulted_comparison_constexpr_mismatch - : diag::ext_defaulted_comparison_constexpr_mismatch) + Diag(FD->getBeginLoc(), diag::err_defaulted_comparison_constexpr_mismatch) << FD->isImplicit() << (int)DCK << FD->isConsteval(); DefaultedComparisonAnalyzer(*this, RD, FD, DCK, DefaultedComparisonAnalyzer::ExplainConstexpr) diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp index f1df936a5abe74..127b58915127cf 100644 --- a/clang/test/AST/Interp/cxx23.cpp +++ b/clang/test/AST/Interp/cxx23.cpp @@ -1,82 +1,58 @@ -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all-20 %s // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23,all %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all-20 %s -fexperimental-new-constant-interpreter // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all %s -fexperimental-new-constant-interpreter /// FIXME: The new interpreter is missing all the 'control flows through...' diagnostics. constexpr int f(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a static variable}} \ // expected20-warning {{is a C++23 extension}} \ // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} \ - // expected23-note {{initializer of 'm' is not a constant expression}} + return m; // expected20-note {{initializer of 'm' is not a constant expression}} } constexpr int g(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} \ - // expected23-note {{initializer of 'm' is not a constant expression}} + // expected20-note {{declared here}} + return m; // expected20-note {{initializer of 'm' is not a constant expression}} } constexpr int c_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static _Thread_local int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{read of non-const variable}} \ - // expected23-note {{read of non-const variable}} + // expected20-note {{declared here}} + return m; // expected20-note {{read of non-const variable}} } constexpr int gnu_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static __thread int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{read of non-const variable}} \ - // expected23-note {{read of non-const variable}} + // expected20-note {{declared here}} + return m; // expected20-note {{read of non-const variable}} } -constexpr int h(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} +constexpr int h(int n) { // ref20-error {{constexpr function never produces a constant expression}} static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a static variable}} \ // expected20-warning {{is a C++23 extension}} return &m - &m; } -constexpr int i(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} +constexpr int i(int n) { // ref20-error {{constexpr function never produces a constant expression}} thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} return &m - &m; } @@ -132,8 +108,9 @@ namespace StaticOperators { static_assert(f2() == 3); struct S1 { - constexpr S1() { // all-error {{never produces a constant expression}} - throw; // all-note 2{{not valid in a constant expression}} + constexpr S1() { // all-20-error {{never produces a constant expression}} + throw; // all-note {{not valid in a constant expression}} \ + // all-20-note {{not valid in a constant expression}} } static constexpr int operator()() { return 3; } // ref20-warning {{C++23 extension}} \ // expected20-warning {{C++23 extension}} diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp index 166bd97e2731cb..c73eb0dee99515 100644 --- a/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp +++ b/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp @@ -1,8 +1,8 @@ // This test is for the [class.compare.default]p3 added by P2002R0 -// Also covers modifications made by P2448R2 and extension warnings +// Also covers modifications made by P2448R2 -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -Wc++23-default-comp-relaxed-constexpr -verify=expected,extension %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx2a %s +// RUN: %clang_cc1 -std=c++23 -verify=expected %s namespace std { struct strong_ordering { @@ -82,10 +82,12 @@ struct TestB { }; struct C { - friend bool operator==(const C&, const C&); // expected-note {{previous}} extension-note 2{{non-constexpr comparison function declared here}} + friend bool operator==(const C&, const C&); // expected-note {{previous}} \ + // cxx2a-note 2{{declared here}} friend bool operator!=(const C&, const C&) = default; // expected-note {{previous}} - friend std::strong_ordering operator<=>(const C&, const C&); // expected-note {{previous}} extension-note 2{{non-constexpr comparison function declared here}} + friend std::strong_ordering operator<=>(const C&, const C&); // expected-note {{previous}} \ + // cxx2a-note 2{{declared here}} friend bool operator<(const C&, const C&) = default; // expected-note {{previous}} friend bool operator<=(const C&, const C&) = default; // expected-note {{previous}} friend bool operator>(const C&, const C&) = default; // expected-note {{previous}} @@ -129,23 +131,23 @@ struct TestD { struct E { A a; - C c; // extension-note 2{{non-constexpr comparison function would be used to compare member 'c'}} + C c; // cxx2a-note 2{{non-constexpr comparison function would be used to compare member 'c'}} A b; - friend constexpr bool operator==(const E&, const E&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr bool operator==(const E&, const E&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator!=(const E&, const E&) = default; - friend constexpr std::strong_ordering operator<=>(const E&, const E&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr std::strong_ordering operator<=>(const E&, const E&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator<(const E&, const E&) = default; friend constexpr bool operator<=(const E&, const E&) = default; friend constexpr bool operator>(const E&, const E&) = default; friend constexpr bool operator>=(const E&, const E&) = default; }; -struct E2 : A, C { // extension-note 2{{non-constexpr comparison function would be used to compare base class 'C'}} - friend constexpr bool operator==(const E2&, const E2&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} +struct E2 : A, C { // cxx2a-note 2{{non-constexpr comparison function would be used to compare base class 'C'}} + friend constexpr bool operator==(const E2&, const E2&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator!=(const E2&, const E2&) = default; - friend constexpr std::strong_ordering operator<=>(const E2&, const E2&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr std::strong_ordering operator<=>(const E2&, const E2&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator<(const E2&, const E2&) = default; friend constexpr bool operator<=(const E2&, const E2&) = default; friend constexpr bool operator>(const E2&, const E2&) = default; @@ -153,14 +155,14 @@ struct E2 : A, C { // extension-note 2{{non-constexpr comparison function would }; struct F { - friend bool operator==(const F&, const F&); // extension-note {{non-constexpr comparison function declared here}} - friend constexpr bool operator!=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - - friend std::strong_ordering operator<=>(const F&, const F&); // extension-note 4{{non-constexpr comparison function declared here}} - friend constexpr bool operator<(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator<=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator>(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator>=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend bool operator==(const F&, const F&); // cxx2a-note {{declared here}} + friend constexpr bool operator!=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + + friend std::strong_ordering operator<=>(const F&, const F&); // cxx2a-note 4{{non-constexpr comparison function declared here}} + friend constexpr bool operator<(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator<=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator>(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator>=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} }; // No implicit 'constexpr' if it's not the first declaration. diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp index 02cdd7f85aebfa..534c3b34d8832a 100644 --- a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp +++ b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -Wc++23-default-comp-relaxed-constexpr -verify=expected,extension %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx2a %s +// RUN: %clang_cc1 -std=c++23 -verify=expected %s // This test is for [class.compare.default]p3 as modified and renumbered to p4 // by P2002R0. -// Also covers modifications made by P2448R2 and extension warnings +// Also covers modifications made by P2448R2 namespace std { struct strong_ordering { @@ -78,13 +78,13 @@ void use_g(G g) { } struct H { - bool operator==(const H&) const; // extension-note {{non-constexpr comparison function declared here}} + bool operator==(const H&) const; // cxx2a-note {{non-constexpr comparison function declared here}} constexpr std::strong_ordering operator<=>(const H&) const { return std::strong_ordering::equal; } }; struct I { - H h; // extension-note {{non-constexpr comparison function would be used to compare member 'h'}} - constexpr std::strong_ordering operator<=>(const I&) const = default; // extension-warning {{implicit 'operator==' invokes a non-constexpr comparison function is a C++23 extension}} + H h; // cxx2a-note {{non-constexpr comparison function would be used to compare member 'h'}} + constexpr std::strong_ordering operator<=>(const I&) const = default; // cxx2a-error {{cannot be declared constexpr}} }; struct J { @@ -148,16 +148,16 @@ namespace NoInjectionIfOperatorEqualsDeclared { namespace GH61238 { template struct my_struct { - A value; // extension-note {{non-constexpr comparison function would be used to compare member 'value'}} + A value; // cxx2a-note {{non-constexpr comparison function would be used to compare member 'value'}} - constexpr friend bool operator==(const my_struct &, const my_struct &) noexcept = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + constexpr friend bool operator==(const my_struct &, const my_struct &) noexcept = default; // cxx2a-error {{cannot be declared constexpr}} }; struct non_constexpr_type { - friend bool operator==(non_constexpr_type, non_constexpr_type) noexcept { // extension-note {{non-constexpr comparison function declared here}} + friend bool operator==(non_constexpr_type, non_constexpr_type) noexcept { // cxx2a-note {{non-constexpr comparison function declared here}} return false; } }; -my_struct obj; // extension-note {{in instantiation of template class 'GH61238::my_struct' requested here}} +my_struct obj; // cxx2a-note {{in instantiation of template class 'GH61238::my_struct' requested here}} } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp index 7ad2e582a81268..48bc8fb426bcb1 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp @@ -58,12 +58,12 @@ namespace subobject { struct A { ~A(); }; - struct B : A { // expected-note {{here}} - constexpr ~B() {} // expected-error {{destructor cannot be declared constexpr because base class 'A' does not have a constexpr destructor}} + struct B : A { // cxx2a-note {{here}} + constexpr ~B() {} // cxx2a-error {{destructor cannot be declared constexpr because base class 'A' does not have a constexpr destructor}} }; struct C { - A a; // expected-note {{here}} - constexpr ~C() {} // expected-error {{destructor cannot be declared constexpr because data member 'a' does not have a constexpr destructor}} + A a; // cxx2a-note {{here}} + constexpr ~C() {} // cxx2a-error {{destructor cannot be declared constexpr because data member 'a' does not have a constexpr destructor}} }; struct D : A { A a; diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp index c07502c0555b50..8cb37ae6d1cdec 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp @@ -14,9 +14,8 @@ constexpr int i(int n) { return m; } -constexpr int g() { // expected-error {{constexpr function never produces a constant expression}} - goto test; // expected-note {{subexpression not valid in a constant expression}} \ - // expected-warning {{use of this statement in a constexpr function is incompatible with C++ standards before C++23}} +constexpr int g() { + goto test; // expected-warning {{use of this statement in a constexpr function is incompatible with C++ standards before C++23}} test: return 0; } @@ -29,9 +28,8 @@ struct NonLiteral { // expected-note 2 {{'NonLiteral' is not literal}} NonLiteral() {} }; -constexpr void non_literal() { // expected-error {{constexpr function never produces a constant expression}} - NonLiteral n; // expected-note {{non-literal type 'NonLiteral' cannot be used in a constant expression}} \ - // expected-warning {{definition of a variable of non-literal type in a constexpr function is incompatible with C++ standards before C++23}} +constexpr void non_literal() { + NonLiteral n; // expected-warning {{definition of a variable of non-literal type in a constexpr function is incompatible with C++ standards before C++23}} } constexpr void non_literal2(bool b) { diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp index 6214ff8006d67f..4416c825226494 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fcxx-exceptions -verify=expected,beforecxx14,beforecxx20,beforecxx23 -std=c++11 %s -// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,beforecxx20,beforecxx23 -std=c++14 %s -// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20,beforecxx23 -std=c++20 %s +// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,beforecxx20,beforecxx23,cxx14_20 -std=c++14 %s +// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20,beforecxx23,cxx14_20 -std=c++20 %s // RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20 -std=c++23 %s namespace N { @@ -11,7 +11,7 @@ namespace M { typedef double D; } -struct NonLiteral { // expected-note 2{{no constexpr constructors}} +struct NonLiteral { // beforecxx23-note 2{{no constexpr constructors}} NonLiteral() {} NonLiteral(int) {} }; @@ -43,7 +43,7 @@ struct T : SS, NonLiteral { // - its return type shall be a literal type; // Once we support P2448R2 constexpr functions will be allowd to return non-literal types // The destructor will also be allowed - constexpr NonLiteral NonLiteralReturn() const { return {}; } // expected-error {{constexpr function's return type 'NonLiteral' is not a literal type}} + constexpr NonLiteral NonLiteralReturn() const { return {}; } // beforecxx23-error {{constexpr function's return type 'NonLiteral' is not a literal type}} constexpr void VoidReturn() const { return; } // beforecxx14-error {{constexpr function's return type 'void' is not a literal type}} constexpr ~T(); // beforecxx20-error {{destructor cannot be declared constexpr}} @@ -52,7 +52,7 @@ struct T : SS, NonLiteral { // - each of its parameter types shall be a literal type; // Once we support P2448R2 constexpr functions will be allowd to have parameters of non-literal types - constexpr int NonLiteralParam(NonLiteral) const { return 0; } // expected-error {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} + constexpr int NonLiteralParam(NonLiteral) const { return 0; } // beforecxx23-error {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} typedef int G(NonLiteral) const; constexpr G NonLiteralParam2; // ok until definition @@ -66,7 +66,7 @@ struct T : SS, NonLiteral { // constexpr since they can't be const. constexpr T &operator=(const T &) = default; // beforecxx14-error {{an explicitly-defaulted copy assignment operator may not have 'const', 'constexpr' or 'volatile' qualifiers}} \ // beforecxx14-warning {{C++14}} \ - // aftercxx14-error{{defaulted definition of copy assignment operator is not constexpr}} + // cxx14_20-error{{defaulted definition of copy assignment operator cannot be marked constexpr}} }; constexpr int T::OutOfLineVirtual() const { return 0; } @@ -229,9 +229,9 @@ namespace DR1364 { return k; // ok, even though lvalue-to-rvalue conversion of a function // parameter is not allowed in a constant expression. } - int kGlobal; // expected-note {{here}} - constexpr int f() { // expected-error {{constexpr function never produces a constant expression}} - return kGlobal; // expected-note {{read of non-const}} + int kGlobal; // beforecxx23-note {{here}} + constexpr int f() { // beforecxx23-error {{constexpr function never produces a constant expression}} + return kGlobal; // beforecxx23-note {{read of non-const}} } } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp index f1f677ebfcd341..92698ec1c7387d 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp @@ -272,7 +272,7 @@ struct X { union XU1 { int a; constexpr XU1() = default; }; #ifndef CXX2A -// expected-error@-2{{not constexpr}} +// expected-error@-2{{cannot be marked constexpr}} #endif union XU2 { int a = 1; constexpr XU2() = default; }; @@ -282,7 +282,7 @@ struct XU3 { }; constexpr XU3() = default; #ifndef CXX2A - // expected-error@-2{{not constexpr}} + // expected-error@-2{{cannot be marked constexpr}} #endif }; struct XU4 { @@ -333,7 +333,7 @@ namespace CtorLookup { constexpr B(B&); }; constexpr B::B(const B&) = default; - constexpr B::B(B&) = default; // expected-error {{not constexpr}} + constexpr B::B(B&) = default; // expected-error {{cannot be marked constexpr}} struct C { A a; @@ -342,7 +342,7 @@ namespace CtorLookup { constexpr C(C&); }; constexpr C::C(const C&) = default; - constexpr C::C(C&) = default; // expected-error {{not constexpr}} + constexpr C::C(C&) = default; // expected-error {{cannot be marked constexpr}} } namespace PR14503 { diff --git a/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp b/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp index 5b525fc91aba1c..849594307390f4 100644 --- a/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp +++ b/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp @@ -3,7 +3,7 @@ // An explicitly-defaulted function may be declared constexpr only if it would // have been implicitly declared as constexpr. struct S1 { - constexpr S1() = default; // expected-error {{defaulted definition of default constructor is not constexpr}} + constexpr S1() = default; // expected-error {{defaulted definition of default constructor cannot be marked constexpr}} constexpr S1(const S1&) = default; constexpr S1(S1&&) = default; constexpr S1 &operator=(const S1&) const = default; // expected-error {{explicitly-defaulted copy assignment operator may not have}} @@ -18,8 +18,8 @@ struct NoCopyMove { }; struct S2 { constexpr S2() = default; - constexpr S2(const S2&) = default; // expected-error {{defaulted definition of copy constructor is not constexpr}} - constexpr S2(S2&&) = default; // expected-error {{defaulted definition of move constructor is not constexpr}} + constexpr S2(const S2&) = default; // expected-error {{defaulted definition of copy constructor cannot be marked constexpr}} + constexpr S2(S2&&) = default; // expected-error {{defaulted definition of move constructor cannot be marked}} NoCopyMove ncm; }; diff --git a/clang/test/CXX/drs/dr13xx.cpp b/clang/test/CXX/drs/dr13xx.cpp index effdc53040d0b0..d8e3b5d87bd149 100644 --- a/clang/test/CXX/drs/dr13xx.cpp +++ b/clang/test/CXX/drs/dr13xx.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++98 %s -verify=expected,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-17,cxx11-14,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-17,cxx11-14,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-17,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-20,cxx11-17,cxx11-14,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-20,cxx11-17,cxx11-14,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-20,cxx11-17,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 %s -verify=expected,cxx11-20,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++2c %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors @@ -485,11 +485,11 @@ namespace dr1358 { // dr1358: 3.1 struct B : Virt { int member; constexpr B(NonLit u) : member(u) {} - // since-cxx11-error@-1 {{constexpr constructor's 1st parameter type 'NonLit' is not a literal type}} - // since-cxx11-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr constructor's 1st parameter type 'NonLit' is not a literal type}} + // cxx11-20-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} constexpr NonLit f(NonLit u) const { return NonLit(); } - // since-cxx11-error@-1 {{constexpr function's return type 'NonLit' is not a literal type}} - // since-cxx11-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr function's return type 'NonLit' is not a literal type}} + // cxx11-20-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} }; #endif } @@ -498,13 +498,13 @@ namespace dr1359 { // dr1359: 3.5 #if __cplusplus >= 201103L union A { constexpr A() = default; }; union B { constexpr B() = default; int a; }; // #dr1359-B - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr before C++23}} union C { constexpr C() = default; int a, b; }; // #dr1359-C - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct X { constexpr X() = default; union {}; }; // since-cxx11-error@-1 {{declaration does not declare anything}} struct Y { constexpr Y() = default; union { int a; }; }; // #dr1359-Y - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} constexpr A a = A(); constexpr B b = B(); diff --git a/clang/test/CXX/drs/dr14xx.cpp b/clang/test/CXX/drs/dr14xx.cpp index 58a2b3a0d0275d..ed6dda731fd518 100644 --- a/clang/test/CXX/drs/dr14xx.cpp +++ b/clang/test/CXX/drs/dr14xx.cpp @@ -153,16 +153,16 @@ namespace dr1460 { // dr1460: 3.5 namespace Defaulted { union A { constexpr A() = default; }; union B { int n; constexpr B() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} union C { int n = 0; constexpr C() = default; }; struct D { union {}; constexpr D() = default; }; // expected-error@-1 {{declaration does not declare anything}} struct E { union { int n; }; constexpr E() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct F { union { int n = 0; }; constexpr F() = default; }; struct G { union { int n = 0; }; union { int m; }; constexpr G() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct H { union { int n = 0; diff --git a/clang/test/CXX/drs/dr15xx.cpp b/clang/test/CXX/drs/dr15xx.cpp index ac503db625ba0e..195c0fa610d579 100644 --- a/clang/test/CXX/drs/dr15xx.cpp +++ b/clang/test/CXX/drs/dr15xx.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx11-14,cxx14-17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,cxx11-14,cxx14-17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors namespace dr1512 { // dr1512: 4 void f(char *p) { @@ -407,7 +407,7 @@ namespace dr1573 { // dr1573: 3.9 B b(1, 'x', 4.0, "hello"); // ok // inherited constructor is effectively constexpr if the user-written constructor would be - struct C { C(); constexpr C(int) {} }; + struct C { C(); constexpr C(int) {} }; // #dr1573-C struct D : C { using C::C; }; constexpr D d = D(0); // ok struct E : C { using C::C; A a; }; // #dr1573-E @@ -420,8 +420,11 @@ namespace dr1573 { // dr1573: 3.9 struct F : C { using C::C; C c; }; // #dr1573-F constexpr F f = F(0); // since-cxx11-error@-1 {{constexpr variable 'f' must be initialized by a constant expression}} - // since-cxx11-note@-2 {{constructor inherited from base class 'C' cannot be used in a constant expression; derived class cannot be implicitly initialized}} - // since-cxx11-note@#dr1573-F {{declared here}} + // cxx11-20-note@-2 {{constructor inherited from base class 'C' cannot be used in a constant expression; derived class cannot be implicitly initialized}} + // since-cxx23-note@-3 {{in implicit initialization for inherited constructor of 'F'}} + // since-cxx23-note@#dr1573-F {{non-constexpr constructor 'C' cannot be used in a constant expression}} + // cxx11-20-note@#dr1573-F {{declared here}} + // since-cxx23-note@#dr1573-C {{declared here}} // inherited constructor is effectively deleted if the user-written constructor would be struct G { G(int); }; diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp index 2dd7d1502e59fb..766c90d3bc7bda 100644 --- a/clang/test/CXX/drs/dr16xx.cpp +++ b/clang/test/CXX/drs/dr16xx.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors #if __cplusplus == 199711L #define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) @@ -256,12 +256,12 @@ namespace dr1658 { // dr1658: 5 struct A { A(A&); }; struct B : virtual A { virtual void f() = 0; }; struct C : virtual A { virtual void f(); }; - struct D : A { virtual void f() = 0; }; + struct D : A { virtual void f() = 0; }; // since-cxx23-note {{previous declaration is here}} struct X { friend B::B(const B&) throw(); friend C::C(C&); - friend D::D(D&); + friend D::D(D&); // since-cxx23-error {{non-constexpr declaration of 'D' follows constexpr declaration}} }; } @@ -350,8 +350,8 @@ namespace dr1684 { // dr1684: 3.6 }; constexpr int f(NonLiteral &) { return 0; } constexpr int f(NonLiteral) { return 0; } - // since-cxx11-error@-1 {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} - // since-cxx11-note@#dr1684-struct {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} + // cxx11-20-note@#dr1684-struct {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} #endif } diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp index b35d3051ab554c..190e05784f32be 100644 --- a/clang/test/CXX/drs/dr6xx.cpp +++ b/clang/test/CXX/drs/dr6xx.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++98 %s -verify=expected,cxx98-17,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx98-17,cxx11-17,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx98-17,cxx11-17,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx98-17,cxx11-17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++20 %s -verify=expected,cxx11-20,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking namespace dr600 { // dr600: 2.8 @@ -584,8 +584,8 @@ namespace dr647 { // dr647: 3.1 struct C { constexpr C(NonLiteral); constexpr C(NonLiteral, int) {} - // since-cxx11-error@-1 {{constexpr constructor's 1st parameter type 'NonLiteral' is not a literal type}} - // since-cxx11-note@#dr647-NonLiteral {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr constructor's 1st parameter type 'NonLiteral' is not a literal type}} + // cxx11-20-note@#dr647-NonLiteral {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} constexpr C() try {} catch (...) {} // cxx11-17-error@-1 {{function try block in constexpr constructor is a C++20 extension}} // cxx11-error@-2 {{use of this statement in a constexpr constructor is a C++14 extension}} @@ -609,15 +609,15 @@ namespace dr647 { // dr647: 3.1 d(0) {} constexpr E(int) - // since-cxx11-error@-1 {{constexpr constructor never produces a constant expression}} - // since-cxx11-note@#dr647-int-d {{non-constexpr constructor 'D' cannot be used in a constant expression}} - // since-cxx11-note@#dr647-D-float-ctor {{declared here}} + // cxx11-20-error@-1 {{constexpr constructor never produces a constant expression}} + // cxx11-20-note@#dr647-int-d {{non-constexpr constructor 'D' cannot be used in a constant expression}} + // cxx11-20-note@#dr647-D-float-ctor {{declared here}} : n(0), d(0.0f) {} // #dr647-int-d constexpr E(float f) - // since-cxx11-error@-1 {{never produces a constant expression}} - // since-cxx11-note@#dr647-float-d {{non-constexpr constructor}} - // since-cxx11-note@#dr647-D-float-ctor {{declared here}} + // cxx11-20-error@-1 {{never produces a constant expression}} + // cxx11-20-note@#dr647-float-d {{non-constexpr constructor}} + // cxx11-20-note@#dr647-D-float-ctor {{declared here}} : n(get()), d(D(0) + f) {} // #dr647-float-d }; diff --git a/clang/test/CXX/expr/expr.const/p5-26.cpp b/clang/test/CXX/expr/expr.const/p5-26.cpp index de2afa71b42669..3624b1e5a3e3df 100644 --- a/clang/test/CXX/expr/expr.const/p5-26.cpp +++ b/clang/test/CXX/expr/expr.const/p5-26.cpp @@ -5,11 +5,11 @@ struct S {}; struct T : S {} t; -consteval void test() { // cxx23-error{{consteval function never produces a constant expression}} +consteval void test() { void* a = &t; const void* b = &t; volatile void* c = &t; - (void)static_cast(a); //cxx23-note {{cast from 'void *' is not allowed in a constant expression in C++ standards before C++2c}} + (void)static_cast(a); (void)static_cast(a); (void)static_cast(a); diff --git a/clang/test/CXX/special/class.copy/p13-0x.cpp b/clang/test/CXX/special/class.copy/p13-0x.cpp index 16c8a4029cbac6..013d5b56582380 100644 --- a/clang/test/CXX/special/class.copy/p13-0x.cpp +++ b/clang/test/CXX/special/class.copy/p13-0x.cpp @@ -125,7 +125,7 @@ namespace Mutable { mutable A a; }; struct C { - constexpr C(const C &) = default; // expected-error {{not constexpr}} + constexpr C(const C &) = default; // expected-error {{cannot be marked constexpr}} A a; }; } diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp index 9e2ae07cbe4c9c..efb391ba0922d8 100644 --- a/clang/test/SemaCXX/constant-expression-cxx11.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp @@ -1273,8 +1273,8 @@ namespace PR11595 { struct B { B(); A& x; }; static_assert(B().x == 3, ""); // expected-error {{constant expression}} expected-note {{non-literal type 'B' cannot be used in a constant expression}} - constexpr bool f(int k) { // expected-error {{constexpr function never produces a constant expression}} - return B().x == k; // expected-note {{non-literal type 'B' cannot be used in a constant expression}} + constexpr bool f(int k) { // cxx11_20-error {{constexpr function never produces a constant expression}} + return B().x == k; // cxx11_20-note {{non-literal type 'B' cannot be used in a constant expression}} } } @@ -1326,8 +1326,8 @@ namespace ExternConstexpr { constexpr int g() { return q; } // expected-note {{outside its lifetime}} constexpr int q = g(); // expected-error {{constant expression}} expected-note {{in call}} - extern int r; // expected-note {{here}} - constexpr int h() { return r; } // expected-error {{never produces a constant}} expected-note {{read of non-const}} + extern int r; // cxx11_20-note {{here}} + constexpr int h() { return r; } // cxx11_20-error {{never produces a constant}} cxx11_20-note {{read of non-const}} struct S { int n; }; extern const S s; @@ -1678,7 +1678,7 @@ namespace ImplicitConstexpr { struct R { constexpr R() noexcept; constexpr R(const R&) noexcept; constexpr R(R&&) noexcept; ~R() noexcept; }; struct S { R r; }; // expected-note 3{{here}} struct T { T(const T&) noexcept; T(T &&) noexcept; ~T() noexcept; }; - struct U { T t; }; // expected-note 3{{here}} + struct U { T t; }; // cxx11_20-note 3{{here}} static_assert(!__is_literal_type(Q), ""); static_assert(!__is_literal_type(R), ""); static_assert(!__is_literal_type(S), ""); @@ -1691,9 +1691,9 @@ namespace ImplicitConstexpr { friend S::S() noexcept; // expected-error {{follows constexpr}} friend S::S(S&&) noexcept; // expected-error {{follows constexpr}} friend S::S(const S&) noexcept; // expected-error {{follows constexpr}} - friend constexpr U::U() noexcept; // expected-error {{follows non-constexpr}} - friend constexpr U::U(U&&) noexcept; // expected-error {{follows non-constexpr}} - friend constexpr U::U(const U&) noexcept; // expected-error {{follows non-constexpr}} + friend constexpr U::U() noexcept; // cxx11_20-error {{follows non-constexpr}} + friend constexpr U::U(U&&) noexcept; // cxx11_20-error {{follows non-constexpr}} + friend constexpr U::U(const U&) noexcept; // cxx11_20-error {{follows non-constexpr}} }; } @@ -1906,9 +1906,9 @@ namespace StmtExpr { }); } static_assert(g(123) == 15129, ""); - constexpr int h() { // expected-error {{never produces a constant}} + constexpr int h() { // cxx11_20-error {{never produces a constant}} return ({ // expected-warning {{extension}} - return 0; // expected-note {{not supported}} + return 0; // cxx11_20-note {{not supported}} 1; }); } @@ -2093,8 +2093,8 @@ namespace ZeroSizeTypes { // expected-note@-2 {{subtraction of pointers to type 'int[0]' of zero size}} int arr[5][0]; - constexpr int f() { // expected-error {{never produces a constant expression}} - return &arr[3] - &arr[0]; // expected-note {{subtraction of pointers to type 'int[0]' of zero size}} + constexpr int f() { // cxx11_20-error {{never produces a constant expression}} + return &arr[3] - &arr[0]; // cxx11_20-note {{subtraction of pointers to type 'int[0]' of zero size}} } } @@ -2118,8 +2118,8 @@ namespace NeverConstantTwoWays { // If we see something non-constant but foldable followed by something // non-constant and not foldable, we want the first diagnostic, not the // second. - constexpr int f(int n) { // expected-error {{never produces a constant expression}} - return (int *)(long)&n == &n ? // expected-note {{reinterpret_cast}} + constexpr int f(int n) { // cxx11_20-error {{never produces a constant expression}} + return (int *)(long)&n == &n ? // cxx11_20-note {{reinterpret_cast}} 1 / 0 : // expected-warning {{division by zero}} 0; } @@ -2277,7 +2277,8 @@ namespace InheritedCtor { struct A { constexpr A(int) {} }; struct B : A { int n; using A::A; }; // expected-note {{here}} - constexpr B b(0); // expected-error {{constant expression}} expected-note {{derived class}} + constexpr B b(0); // expected-error {{constant expression}} cxx11_20-note {{derived class}}\ + // cxx23-note {{not initialized}} struct C : A { using A::A; struct { union { int n, m = 0; }; union { int a = 0; }; int k = 0; }; struct {}; union {}; }; // expected-warning 6{{}} constexpr C c(0); @@ -2316,10 +2317,11 @@ namespace InheritedCtor { namespace PR28366 { namespace ns1 { -void f(char c) { //expected-note2{{declared here}} +void f(char c) { //expected-note{{declared here}} + //cxx11_20-note@-1{{declared here}} struct X { - static constexpr char f() { //expected-error{{never produces a constant expression}} - return c; //expected-error{{reference to local}} expected-note{{function parameter}} + static constexpr char f() { // cxx11_20-error {{never produces a constant expression}} + return c; //expected-error{{reference to local}} cxx11_20-note{{function parameter}} } }; int I = X::f(); diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp index 273d7ff3a208e2..80a7a2dd31531c 100644 --- a/clang/test/SemaCXX/constant-expression-cxx14.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp @@ -44,13 +44,13 @@ constexpr int g(int k) { return 3 * k3 + 5 * k2 + n * k - 20; } static_assert(g(2) == 42, ""); -constexpr int h(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int h(int n) { // cxx14_20-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx14_20-note {{control flows through the definition of a static variable}} \ // cxx14_20-warning {{definition of a static variable in a constexpr function is a C++23 extension}} return m; } -constexpr int i(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int i(int n) { // cxx14_20-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx14_20-note {{control flows through the definition of a thread_local variable}} \ // cxx14_20-warning {{definition of a thread_local variable in a constexpr function is a C++23 extension}} return m; } @@ -68,6 +68,7 @@ constexpr int j(int k) { } } } // expected-note 2{{control reached end of constexpr function}} + // cxx23-warning@-1 {{does not return a value in all control paths}} static_assert(j(0) == -3, ""); static_assert(j(1) == 5, ""); static_assert(j(2), ""); // expected-error {{constant expression}} expected-note {{in call to 'j(2)'}} @@ -104,10 +105,10 @@ static_assert(l(false) == 5, ""); static_assert(l(true), ""); // expected-error {{constant expression}} expected-note {{in call to 'l(true)'}} // Potential constant expression checking is still applied where possible. -constexpr int htonl(int x) { // expected-error {{never produces a constant expression}} +constexpr int htonl(int x) { // cxx14_20-error {{never produces a constant expression}} typedef unsigned char uchar; uchar arr[4] = { uchar(x >> 24), uchar(x >> 16), uchar(x >> 8), uchar(x) }; - return *reinterpret_cast(arr); // expected-note {{reinterpret_cast is not allowed in a constant expression}} + return *reinterpret_cast(arr); // cxx14_20-note {{reinterpret_cast is not allowed in a constant expression}} } constexpr int maybe_htonl(bool isBigEndian, int x) { @@ -183,7 +184,7 @@ namespace string_assign { static_assert(!test1(100), ""); static_assert(!test1(101), ""); // expected-error {{constant expression}} expected-note {{in call to 'test1(101)'}} - constexpr void f() { // expected-error{{constexpr function never produces a constant expression}} expected-note@+2{{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} + constexpr void f() { // cxx14_20-error{{constexpr function never produces a constant expression}} cxx14_20-note@+2{{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} char foo[10] = { "z" }; // expected-note {{here}} foo[10] = 'x'; // expected-warning {{past the end}} } @@ -207,14 +208,14 @@ namespace array_resize { namespace potential_const_expr { constexpr void set(int &n) { n = 1; } constexpr int div_zero_1() { int z = 0; set(z); return 100 / z; } // no error - constexpr int div_zero_2() { // expected-error {{never produces a constant expression}} + constexpr int div_zero_2() { // cxx14_20-error {{never produces a constant expression}} int z = 0; - return 100 / (set(z), 0); // expected-note {{division by zero}} + return 100 / (set(z), 0); // cxx14_20-note {{division by zero}} } - int n; // expected-note {{declared here}} - constexpr int ref() { // expected-error {{never produces a constant expression}} + int n; // cxx14_20-note {{declared here}} + constexpr int ref() { // cxx14_20-error {{never produces a constant expression}} int &r = n; - return r; // expected-note {{read of non-const variable 'n'}} + return r; // cxx14_20-note {{read of non-const variable 'n'}} } } @@ -846,8 +847,8 @@ namespace StmtExpr { static_assert(g() == 0, ""); // expected-error {{constant expression}} expected-note {{in call}} // FIXME: We should handle the void statement expression case. - constexpr int h() { // expected-error {{never produces a constant}} - ({ if (true) {} }); // expected-note {{not supported}} + constexpr int h() { // cxx14_20-error {{never produces a constant}} + ({ if (true) {} }); // cxx14_20-note {{not supported}} return 0; } } @@ -1043,9 +1044,9 @@ static_assert(sum(Cs) == 'a' + 'b', ""); // expected-error{{not an integral cons constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant expression}} expected-note{{in call}} } -constexpr void PR28739(int n) { // expected-error {{never produces a constant}} +constexpr void PR28739(int n) { // cxx14_20-error {{never produces a constant}} int *p = &n; // expected-note {{array 'p' declared here}} - p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} + p += (__int128)(unsigned long)-1; // cxx14_20-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } diff --git a/clang/test/SemaCXX/constant-expression-cxx2b.cpp b/clang/test/SemaCXX/constant-expression-cxx2b.cpp index 2ee1d48d1cd697..2519839b7ac578 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2b.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2b.cpp @@ -10,36 +10,36 @@ struct Constexpr{}; #if __cplusplus > 202002L -constexpr int f(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int f(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx2a-note {{control flows through the definition of a static variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int g(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int g(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a thread_local variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int c_thread_local(int n) { // expected-error {{constexpr function never produces a constant expression}} - static _Thread_local int m = 0; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int c_thread_local(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static _Thread_local int m = 0; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int gnu_thread_local(int n) { // expected-error {{constexpr function never produces a constant expression}} - static __thread int m = 0; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int gnu_thread_local(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static __thread int m = 0; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int h(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int h(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx2a-note {{control flows through the definition of a static variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return &m - &m; } -constexpr int i(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int i(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a thread_local variable in a constexpr function is incompatible with C++ standards before C++23}} return &m - &m; } diff --git a/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp b/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp new file mode 100644 index 00000000000000..4dc16c59d8058d --- /dev/null +++ b/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp @@ -0,0 +1,159 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=expected -std=c++23 %s + +// This test covers modifications made by P2448R2. + +// Check that there is no error when a constexpr function that never produces a +// constant expression, but still an error if such function is called from +// constexpr context. +constexpr int F(int N) { + double D = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +constexpr int F0(int N) { + if (N == 0) + double d2 = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +template +constexpr int FT(T N) { + double D = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +class NonLiteral { // expected-note {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors}} +public: + NonLiteral() {} + ~NonLiteral() {} +}; + +constexpr NonLiteral F1() { + return NonLiteral{}; +} + +constexpr int F2(NonLiteral N) { + return 8; +} + +class Derived : public NonLiteral { + constexpr ~Derived() {}; +}; + +class Derived1 : public NonLiteral { + constexpr Derived1() : NonLiteral () {} +}; + + +struct X { + X(); + X(const X&); + X(X&&); + X& operator=(X&); + X& operator=(X&& other); + bool operator==(X const&) const; +}; + +template +struct Wrapper { + constexpr Wrapper() = default; + constexpr Wrapper(Wrapper const&) = default; + constexpr Wrapper(T const& t) : t(t) { } + constexpr Wrapper(Wrapper &&) = default; + constexpr X get() const { return t; } + constexpr bool operator==(Wrapper const&) const = default; + private: + T t; +}; + +struct WrapperNonT { + constexpr WrapperNonT() = default; + constexpr WrapperNonT(WrapperNonT const&) = default; + constexpr WrapperNonT(X const& t) : t(t) { } + constexpr WrapperNonT(WrapperNonT &&) = default; + constexpr WrapperNonT& operator=(WrapperNonT &) = default; + constexpr WrapperNonT& operator=(WrapperNonT&& other) = default; + constexpr X get() const { return t; } + constexpr bool operator==(WrapperNonT const&) const = default; + private: + X t; +}; + +struct NonDefaultMembers { + constexpr NonDefaultMembers() {}; // expected-note {{non-literal type 'X' cannot be used in a constant expression}} + constexpr NonDefaultMembers(NonDefaultMembers const&) {}; + constexpr NonDefaultMembers(NonDefaultMembers &&) {}; + constexpr NonDefaultMembers& operator=(NonDefaultMembers &other) {this->t = other.t; return *this;} + constexpr NonDefaultMembers& operator=(NonDefaultMembers&& other) {this->t = other.t; return *this;} + constexpr bool operator==(NonDefaultMembers const& other) const {return this->t == other.t;} + X t; +}; + +int Glob = 0; +class C1 { +public: + constexpr C1() : D(Glob) {}; +private: + int D; +}; + +void test() { + + constexpr int A = F(3); // expected-error {{constexpr variable 'A' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F(3); + constexpr int B = F0(0); // expected-error {{constexpr variable 'B' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F0(0); + constexpr auto C = F1(); // expected-error {{constexpr variable cannot have non-literal type 'const NonLiteral'}} + F1(); + NonLiteral L; + constexpr auto D = F2(L); // expected-error {{constexpr variable 'D' must be initialized by a constant expression}} + // expected-note@-1 {{non-literal type 'NonLiteral' cannot be used in a constant expression}} + + constexpr auto E = FT(1); // expected-error {{constexpr variable 'E' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F2(L); + + Wrapper x; + WrapperNonT x1; + NonDefaultMembers x2; + + // TODO these produce notes with an invalid source location. + // static_assert((Wrapper(), true)); + // static_assert((WrapperNonT(), true),""); + + static_assert((NonDefaultMembers(), true),""); // expected-error{{expression is not an integral constant expression}} \ + // expected-note {{in call to}} + constexpr bool FFF = (NonDefaultMembers() == NonDefaultMembers()); // expected-error{{must be initialized by a constant expression}} \ + // expected-note{{non-literal}} +} + +struct A { + A (); + ~A(); +}; + +template +struct opt +{ + union { + char c; + T data; + }; + + constexpr opt() {} + + constexpr ~opt() { + if (engaged) + data.~T(); + } + + bool engaged = false; +}; + +consteval void foo() { + opt a; +} + +void bar() { foo(); } diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp index d8482ec53f0ed4..192621225a543c 100644 --- a/clang/test/SemaCXX/cxx2a-consteval.cpp +++ b/clang/test/SemaCXX/cxx2a-consteval.cpp @@ -54,7 +54,7 @@ struct C { struct D { C c; - consteval D() = default; // expected-error {{cannot be consteval}} + consteval D() = default; // expected-error {{cannot be marked consteval}} consteval ~D() = default; // expected-error {{destructor cannot be declared consteval}} }; diff --git a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp index 415bbbf1a0bc50..431d77ca785b8e 100644 --- a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp +++ b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx20_23,cxx23 %s // RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx20_23,cxx23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx20,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx20,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING // RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=expected,since-cxx14,cxx14_20,cxx14 %s // RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=expected,since-cxx14,cxx14_20,cxx14 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING @@ -299,8 +299,8 @@ namespace Constexpr { constexpr int q = Y().f(); // expected-error {{must be initialized by a constant expression}} expected-note {{in call to 'Y().f()'}} } struct NonLiteral { ~NonLiteral(); } nl; // cxx14-note {{user-provided destructor}} - // cxx20_23-note@-1 {{'NonLiteral' is not literal because its destructor is not constexpr}} - constexpr auto f2(int n) { return nl; } // expected-error {{return type 'struct NonLiteral' is not a literal type}} + // cxx20-note@-1 {{'NonLiteral' is not literal because its destructor is not constexpr}} + constexpr auto f2(int n) { return nl; } // cxx14_20-error {{constexpr function's return type 'struct NonLiteral' is not a literal type}} } // It's not really clear whether these are valid, but this matches g++. diff --git a/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp b/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp index 1b97484767b1a5..067a404c489aa6 100644 --- a/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp +++ b/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp @@ -54,5 +54,5 @@ struct Z { struct W { int w; - constexpr W() __constant = default; // expected-error {{defaulted definition of default constructor is not constexpr}} + constexpr W() __constant = default; // expected-error {{defaulted definition of default constructor cannot be marked constexpr}} }; diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 421b3426b006f9..5ed27cdd43b368 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -356,14 +356,7 @@

C++23 implementation status

Relaxing some constexpr restrictions
P2448R2 - -
Clang 17 (Partial) - We do not support outside of defaulted special memeber functions the change that constexpr functions no - longer have to be constexpr compatible but rather support a less restricted requirements for constexpr - functions. Which include allowing non-literal types as return values and parameters, allow calling of - non-constexpr functions and constructors. -
- + Clang 19 Using unknown pointers and references in constant expressions From c59129a7c79448837d665de8f2743ad4b14666f6 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 7 Mar 2024 16:50:26 +0800 Subject: [PATCH 006/158] [RISCV] Recursively split concat_vector into smaller LMULs (#83035) This is the concat_vector equivalent of #81312, in that we recursively split concat_vectors with more than two operands into smaller concat_vectors. This allows us to break up the chain of vslideups, as well as perform the vslideups at a smaller LMUL, which in turn reduces register pressure as the previous lowering performed N vslideups at the highest result LMUL. For now, it stops splitting past MF2. This is done as a DAG combine so that any undef operands are combined away: If we do this during lowering then we end up with unnecessary vslideups of undefs. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 60 +- .../CodeGen/RISCV/rvv/active_lane_mask.ll | 91 +- .../RISCV/rvv/combine-store-extract-crash.ll | 4 +- .../CodeGen/RISCV/rvv/extract-subvector.ll | 3 +- .../RISCV/rvv/fixed-vectors-shuffle-concat.ll | 307 ++--- .../rvv/fixed-vectors-strided-load-combine.ll | 126 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 1066 ++++++++++------- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 13 +- llvm/test/CodeGen/RISCV/rvv/pr63596.ll | 37 +- 9 files changed, 926 insertions(+), 781 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4c3dc63afd878d..750d70c03eabd7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15283,13 +15283,62 @@ static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } +// Recursively split up concat_vectors with more than 2 operands: +// +// concat_vector op1, op2, op3, op4 +// -> +// concat_vector (concat_vector op1, op2), (concat_vector op3, op4) +// +// This reduces the length of the chain of vslideups and allows us to perform +// the vslideups at a smaller LMUL, limited to MF2. +// +// We do this as a DAG combine rather than during lowering so that any undef +// operands can get combined away. +static SDValue +performCONCAT_VECTORSSplitCombine(SDNode *N, SelectionDAG &DAG, + const RISCVTargetLowering &TLI) { + SDLoc DL(N); + + if (N->getNumOperands() <= 2) + return SDValue(); + + if (!TLI.isTypeLegal(N->getValueType(0))) + return SDValue(); + MVT VT = N->getSimpleValueType(0); + + // Don't split any further than MF2. + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) + ContainerVT = getContainerForFixedLengthVector(DAG, VT, TLI.getSubtarget()); + if (ContainerVT.bitsLT(getLMUL1VT(ContainerVT))) + return SDValue(); + + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + assert(isPowerOf2_32(N->getNumOperands())); + size_t HalfNumOps = N->getNumOperands() / 2; + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + N->ops().take_front(HalfNumOps)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + N->ops().drop_front(HalfNumOps)); + + // Lower to an insert_subvector directly so the concat_vectors don't get + // recombined. + SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Lo, + DAG.getVectorIdxConstant(0, DL)); + Vec = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, VT, Vec, Hi, + DAG.getVectorIdxConstant(HalfVT.getVectorMinNumElements(), DL)); + return Vec; +} + // If we're concatenating a series of vector loads like // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... // Then we can turn this into a strided load by widening the vector elements // vlse32 p, stride=n -static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, - const RISCVTargetLowering &TLI) { +static SDValue +performCONCAT_VECTORSStridedLoadCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + const RISCVTargetLowering &TLI) { SDLoc DL(N); EVT VT = N->getValueType(0); @@ -16394,7 +16443,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case ISD::CONCAT_VECTORS: - if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) + if (SDValue V = + performCONCAT_VECTORSStridedLoadCombine(N, DAG, Subtarget, *this)) + return V; + if (SDValue V = performCONCAT_VECTORSSplitCombine(N, DAG, *this)) return V; break; case ISD::INSERT_VECTOR_ELT: diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 87d95d7596d4fa..139579b3d2a361 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -161,72 +161,71 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv128: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 6 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 10, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 8 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 6 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 12, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 10 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v9, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 14, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 12 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 14 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vi v0, v8, 8 ; CHECK-NEXT: ret %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) ret <128 x i1> %mask diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll index c64216180c2af7..ed434deea1a837 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll @@ -19,7 +19,7 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV32-NEXT: th.swia a0, (a1), 4, 0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v10, (a3) -; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vi v10, v9, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vzext.vf4 v12, v10 @@ -42,7 +42,7 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV64-NEXT: th.swia a0, (a1), 4, 0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v10, (a3) -; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vi v10, v9, 4 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vzext.vf4 v12, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index 76aa2b913c6525..e15e6452163b1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -469,9 +469,8 @@ define @extract_nxv6f16_nxv12f16_6( %in) ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index e5bef20fd9e24d..8474f95edd813f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -5,6 +5,59 @@ ; RUN: llc < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s +define <8 x i16> @concat_2xv4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: concat_2xv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %ab = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> + ret <8 x i16> %ab +} + +define <8 x i16> @concat_4xv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) { +; CHECK-LABEL: concat_4xv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %ab = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + %cd = shufflevector <2 x i16> %c, <2 x i16> %d, <4 x i32> + %abcd = shufflevector <4 x i16> %ab, <4 x i16> %cd, <8 x i32> + ret <8 x i16> %abcd +} + +define <8 x i16> @concat_8xv1i16(<1 x i16> %a, <1 x i16> %b, <1 x i16> %c, <1 x i16> %d, <1 x i16> %e, <1 x i16> %f, <1 x i16> %g, <1 x i16> %h) { +; CHECK-LABEL: concat_8xv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v13, 1 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v14, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v15, 3 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 3 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 +; CHECK-NEXT: ret + %ab = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> + %cd = shufflevector <1 x i16> %c, <1 x i16> %d, <2 x i32> + %abcd = shufflevector <2 x i16> %ab, <2 x i16> %cd, <4 x i32> + %ef = shufflevector <1 x i16> %e, <1 x i16> %f, <2 x i32> + %gh = shufflevector <1 x i16> %g, <1 x i16> %h, <2 x i32> + %efgh = shufflevector <2 x i16> %ef, <2 x i16> %gh, <4 x i32> + %abcdefgh = shufflevector <4 x i16> %abcd, <4 x i16> %efgh, <8 x i32> + ret <8 x i16> %abcdefgh +} + define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: concat_2xv4i32: ; CHECK: # %bb.0: @@ -19,14 +72,11 @@ define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { ; CHECK-LABEL: concat_4xv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v11 -; CHECK-NEXT: vmv1r.v v14, v9 -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v14, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: ret %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> @@ -37,24 +87,18 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) { ; CHECK-LABEL: concat_8xv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v15 -; CHECK-NEXT: vmv1r.v v18, v13 -; CHECK-NEXT: vmv1r.v v20, v11 -; CHECK-NEXT: vmv1r.v v22, v9 -; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v22, 1 -; CHECK-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v14, v15, 1 +; CHECK-NEXT: vslideup.vi v12, v13, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 3 -; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 4 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v18, 5 -; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v14, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 7 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %ab = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> %cd = shufflevector <1 x i32> %c, <1 x i32> %d, <2 x i32> @@ -80,15 +124,14 @@ define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: concat_4xv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v11 -; CHECK-NEXT: vmv1r.v v16, v10 -; CHECK-NEXT: vmv1r.v v20, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 4 -; CHECK-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v16, 8 +; CHECK-NEXT: vmv1r.v v14, v11 +; CHECK-NEXT: vmv1r.v v12, v10 +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 4 +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 12 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> @@ -99,26 +142,18 @@ define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) { ; CHECK-LABEL: concat_8xv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v15 -; CHECK-NEXT: vmv1r.v v20, v14 -; CHECK-NEXT: vmv1r.v v24, v13 -; CHECK-NEXT: vmv1r.v v28, v11 -; CHECK-NEXT: vmv1r.v v4, v10 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v0, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v4, 4 -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v28, 6 -; CHECK-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v24, 10 -; CHECK-NEXT: vsetivli zero, 14, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 12 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v14, v15, 2 +; CHECK-NEXT: vslideup.vi v12, v13, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 14 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> @@ -152,29 +187,27 @@ define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) { define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; VLA-LABEL: concat_4xv8i32: ; VLA: # %bb.0: -; VLA-NEXT: vmv2r.v v16, v14 -; VLA-NEXT: vmv2r.v v24, v12 -; VLA-NEXT: vmv2r.v v0, v10 -; VLA-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v0, 8 -; VLA-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v24, 16 +; VLA-NEXT: vmv2r.v v20, v14 +; VLA-NEXT: vmv2r.v v16, v12 +; VLA-NEXT: vmv2r.v v12, v10 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v16, v20, 8 +; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; VLA-NEXT: vslideup.vi v8, v16, 24 +; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret ; ; VLS-LABEL: concat_4xv8i32: ; VLS: # %bb.0: -; VLS-NEXT: vmv2r.v v16, v14 -; VLS-NEXT: vmv2r.v v24, v12 -; VLS-NEXT: vmv2r.v v0, v10 -; VLS-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v0, 8 -; VLS-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v24, 16 +; VLS-NEXT: vmv2r.v v20, v14 +; VLS-NEXT: vmv2r.v v16, v12 +; VLS-NEXT: vmv2r.v v12, v10 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v16, v20, 8 +; VLS-NEXT: vslideup.vi v8, v12, 8 ; VLS-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; VLS-NEXT: vslideup.vi v8, v16, 24 +; VLS-NEXT: vslideup.vi v8, v16, 16 ; VLS-NEXT: ret %ab = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> %cd = shufflevector <8 x i32> %c, <8 x i32> %d, <16 x i32> @@ -185,123 +218,49 @@ define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) { ; VLA-LABEL: concat_8xv4i32: ; VLA: # %bb.0: -; VLA-NEXT: addi sp, sp, -16 -; VLA-NEXT: .cfi_def_cfa_offset 16 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 5 -; VLA-NEXT: sub sp, sp, a0 -; VLA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; VLA-NEXT: vmv1r.v v16, v15 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: mv a1, a0 -; VLA-NEXT: slli a0, a0, 1 -; VLA-NEXT: add a0, a0, a1 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v16, v14 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 4 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v16, v13 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; VLA-NEXT: vmv1r.v v18, v15 +; VLA-NEXT: vmv1r.v v20, v14 +; VLA-NEXT: vmv1r.v v22, v13 ; VLA-NEXT: vmv1r.v v16, v12 -; VLA-NEXT: addi a0, sp, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v0, v11 -; VLA-NEXT: vmv1r.v v24, v10 -; VLA-NEXT: vmv1r.v v16, v9 -; VLA-NEXT: vsetivli zero, 8, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v16, 4 -; VLA-NEXT: vsetivli zero, 12, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v24, 8 -; VLA-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v0, 12 -; VLA-NEXT: vsetivli zero, 20, e32, m8, tu, ma -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 16 -; VLA-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 20 -; VLA-NEXT: vsetivli zero, 28, e32, m8, tu, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 4 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 24 +; VLA-NEXT: vmv1r.v v14, v11 +; VLA-NEXT: vmv1r.v v12, v10 +; VLA-NEXT: vmv1r.v v10, v9 +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vslideup.vi v20, v18, 4 +; VLA-NEXT: vslideup.vi v16, v22, 4 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v16, v20, 8 +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vslideup.vi v12, v14, 4 +; VLA-NEXT: vslideup.vi v8, v10, 4 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: mv a1, a0 -; VLA-NEXT: slli a0, a0, 1 -; VLA-NEXT: add a0, a0, a1 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 28 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 5 -; VLA-NEXT: add sp, sp, a0 -; VLA-NEXT: addi sp, sp, 16 +; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret ; ; VLS-LABEL: concat_8xv4i32: ; VLS: # %bb.0: -; VLS-NEXT: addi sp, sp, -16 -; VLS-NEXT: .cfi_def_cfa_offset 16 -; VLS-NEXT: addi sp, sp, -512 -; VLS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; VLS-NEXT: vmv1r.v v16, v15 -; VLS-NEXT: addi a0, sp, 400 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v16, v14 -; VLS-NEXT: addi a0, sp, 272 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v16, v13 -; VLS-NEXT: addi a0, sp, 144 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; VLS-NEXT: vmv1r.v v18, v15 +; VLS-NEXT: vmv1r.v v20, v14 +; VLS-NEXT: vmv1r.v v22, v13 ; VLS-NEXT: vmv1r.v v16, v12 -; VLS-NEXT: addi a0, sp, 16 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v0, v11 -; VLS-NEXT: vmv1r.v v24, v10 -; VLS-NEXT: vmv1r.v v16, v9 -; VLS-NEXT: vsetivli zero, 8, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v16, 4 -; VLS-NEXT: vsetivli zero, 12, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v24, 8 -; VLS-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v0, 12 -; VLS-NEXT: vsetivli zero, 20, e32, m8, tu, ma -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 16 -; VLS-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLS-NEXT: addi a0, sp, 144 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 20 -; VLS-NEXT: vsetivli zero, 28, e32, m8, tu, ma -; VLS-NEXT: addi a0, sp, 272 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 24 +; VLS-NEXT: vmv1r.v v14, v11 +; VLS-NEXT: vmv1r.v v12, v10 +; VLS-NEXT: vmv1r.v v10, v9 +; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLS-NEXT: vslideup.vi v20, v18, 4 +; VLS-NEXT: vslideup.vi v16, v22, 4 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v16, v20, 8 +; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLS-NEXT: vslideup.vi v12, v14, 4 +; VLS-NEXT: vslideup.vi v8, v10, 4 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v8, v12, 8 ; VLS-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; VLS-NEXT: addi a0, sp, 400 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 28 -; VLS-NEXT: addi sp, sp, 512 -; VLS-NEXT: addi sp, sp, 16 +; VLS-NEXT: vslideup.vi v8, v16, 16 ; VLS-NEXT: ret %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index ba5db552b8544f..37902aa1873215 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -24,17 +24,15 @@ define void @widen_2xv4i16(ptr %x, ptr %z) { define void @widen_3xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: widen_3xv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, 16 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 8 -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vle16.v v12, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: vle16.v v8, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 12, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v10, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 8 @@ -72,20 +70,18 @@ define void @widen_4xv4i16(ptr %x, ptr %z) { define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; CHECK-NO-MISALIGN-LABEL: widen_4xv4i16_unaligned: ; CHECK-NO-MISALIGN: # %bb.0: -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) -; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 -; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) ; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16 -; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2) +; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) +; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 ; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 -; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0) -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0) +; CHECK-NO-MISALIGN-NEXT: vle8.v v11, (a2) +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v10, v9, 4 +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12 +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 8 ; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1) ; CHECK-NO-MISALIGN-NEXT: ret ; @@ -185,21 +181,14 @@ define void @strided_constant_0(ptr %x, ptr %z) { define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: strided_constant_mismatch_4xv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 2 -; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: addi a2, a0, 6 -; CHECK-NEXT: vle16.v v12, (a2) -; CHECK-NEXT: addi a0, a0, 8 -; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v14, 12 -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: li a3, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vlse64.v v8, (a0), a3 +; CHECK-NEXT: vlse64.v v10, (a2), a3 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 2 @@ -255,59 +244,38 @@ define void @strided_runtime_4xv4i16(ptr %x, ptr %z, i64 %s) { define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) { ; RV32-LABEL: strided_runtime_mismatch_4xv4i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vle16.v v10, (a0) -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vle16.v v14, (a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 4 -; RV32-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v12, 8 -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v14, 12 -; RV32-NEXT: vse16.v v8, (a1) +; RV32-NEXT: add a3, a0, a2 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), a2 +; RV32-NEXT: vlse64.v v10, (a3), a2 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vse64.v v8, (a1) ; RV32-NEXT: ret ; ; RV64-LABEL: strided_runtime_mismatch_4xv4i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vle16.v v10, (a0) -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: vle16.v v12, (a0) -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vle16.v v14, (a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v12, 8 -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v14, 12 -; RV64-NEXT: vse16.v v8, (a1) +; RV64-NEXT: add a4, a0, a2 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vlse64.v v8, (a0), a2 +; RV64-NEXT: vlse64.v v10, (a3), a2 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: ret ; ; ZVE64F-LABEL: strided_runtime_mismatch_4xv4i16: ; ZVE64F: # %bb.0: -; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVE64F-NEXT: vle16.v v8, (a0) -; ZVE64F-NEXT: add a0, a0, a2 -; ZVE64F-NEXT: vle16.v v10, (a0) -; ZVE64F-NEXT: add a0, a0, a3 -; ZVE64F-NEXT: vle16.v v12, (a0) -; ZVE64F-NEXT: add a0, a0, a2 -; ZVE64F-NEXT: vle16.v v14, (a0) -; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v10, 4 -; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v12, 8 -; ZVE64F-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVE64F-NEXT: vslideup.vi v8, v14, 12 -; ZVE64F-NEXT: vse16.v v8, (a1) +; ZVE64F-NEXT: add a4, a0, a2 +; ZVE64F-NEXT: add a3, a4, a3 +; ZVE64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVE64F-NEXT: vlse64.v v8, (a0), a2 +; ZVE64F-NEXT: vlse64.v v10, (a3), a2 +; ZVE64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVE64F-NEXT: vslideup.vi v8, v10, 2 +; ZVE64F-NEXT: vse64.v v8, (a1) ; ZVE64F-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 %s diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 48ce7d623475cb..cbdabab65cc678 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -441,57 +441,50 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -609,57 +602,50 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -787,60 +773,53 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -1404,90 +1383,125 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1682,90 +1696,125 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 -; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1982,94 +2031,129 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -3728,57 +3812,50 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -3894,57 +3971,50 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -4071,60 +4141,53 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -4676,90 +4739,125 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -4952,90 +5050,125 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5251,94 +5384,129 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index f3ae03af7c7868..0b236f6d3ff388 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2136,17 +2136,18 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vslidedown.vx v8, v16, a1 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vslidedown.vx v0, v8, a2 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index c27488b18a017a..d13d67fd0a8824 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -9,39 +9,38 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: lhu s0, 6(a0) -; CHECK-NEXT: lhu s1, 4(a0) -; CHECK-NEXT: lhu s2, 0(a0) -; CHECK-NEXT: lhu a0, 2(a0) +; CHECK-NEXT: lhu s0, 0(a0) +; CHECK-NEXT: lhu s1, 2(a0) +; CHECK-NEXT: lhu s2, 4(a0) +; CHECK-NEXT: lhu a0, 6(a0) ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 8(sp) +; CHECK-NEXT: fsw fa0, 4(sp) ; CHECK-NEXT: fmv.w.x fa0, s2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 0(sp) +; CHECK-NEXT: fsw fa0, 12(sp) ; CHECK-NEXT: fmv.w.x fa0, s1 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 12(sp) +; CHECK-NEXT: fsw fa0, 8(sp) ; CHECK-NEXT: fmv.w.x fa0, s0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 4(sp) -; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: fsw fa0, 0(sp) +; CHECK-NEXT: addi a0, sp, 4 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, sp, 12 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: addi a0, sp, 4 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload From 7ce1cfed9a11735f0f4ee8a3a8bebfa87ee76d07 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Thu, 7 Mar 2024 01:06:20 -0800 Subject: [PATCH 007/158] [alpha.webkit.UncountedLocalVarsChecker] Allow uncounted object references within trivial statements (#82229) This PR makes alpha.webkit.UncountedLocalVarsChecker ignore raw references and pointers to a ref counted type which appears within "trival" statements. To do this, this PR extends TrivialFunctionAnalysis so that it can also analyze "triviality" of statements as well as that of functions Each Visit* function is now augmented with withCachedResult, which is responsible for looking up and updating the cache for each Visit* functions. As this PR dramatically improves the false positive rate of the checker, it also deletes the code to ignore raw pointers and references within if and for statements. --- .../Checkers/WebKit/PtrTypesSemantics.cpp | 76 +++++++++---- .../Checkers/WebKit/PtrTypesSemantics.h | 7 +- .../WebKit/UncountedLocalVarsChecker.cpp | 82 +++++++------- .../Analysis/Checkers/WebKit/mock-types.h | 2 + .../Checkers/WebKit/uncounted-local-vars.cpp | 101 ++++++++++++++++-- 5 files changed, 203 insertions(+), 65 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 01b191ab0eeaf4..287f6a52870056 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -253,6 +253,19 @@ class TrivialFunctionAnalysisVisitor return true; } + template + bool WithCachedResult(const Stmt *S, CheckFunction Function) { + // If the statement isn't in the cache, conservatively assume that + // it's not trivial until analysis completes. Insert false to the cache + // first to avoid infinite recursion. + auto [It, IsNew] = Cache.insert(std::make_pair(S, false)); + if (!IsNew) + return It->second; + bool Result = Function(); + Cache[S] = Result; + return Result; + } + public: using CacheTy = TrivialFunctionAnalysis::CacheTy; @@ -267,7 +280,7 @@ class TrivialFunctionAnalysisVisitor bool VisitCompoundStmt(const CompoundStmt *CS) { // A compound statement is allowed as long each individual sub-statement // is trivial. - return VisitChildren(CS); + return WithCachedResult(CS, [&]() { return VisitChildren(CS); }); } bool VisitReturnStmt(const ReturnStmt *RS) { @@ -279,17 +292,36 @@ class TrivialFunctionAnalysisVisitor bool VisitDeclStmt(const DeclStmt *DS) { return VisitChildren(DS); } bool VisitDoStmt(const DoStmt *DS) { return VisitChildren(DS); } - bool VisitIfStmt(const IfStmt *IS) { return VisitChildren(IS); } + bool VisitIfStmt(const IfStmt *IS) { + return WithCachedResult(IS, [&]() { return VisitChildren(IS); }); + } + bool VisitForStmt(const ForStmt *FS) { + return WithCachedResult(FS, [&]() { return VisitChildren(FS); }); + } + bool VisitCXXForRangeStmt(const CXXForRangeStmt *FS) { + return WithCachedResult(FS, [&]() { return VisitChildren(FS); }); + } + bool VisitWhileStmt(const WhileStmt *WS) { + return WithCachedResult(WS, [&]() { return VisitChildren(WS); }); + } bool VisitSwitchStmt(const SwitchStmt *SS) { return VisitChildren(SS); } bool VisitCaseStmt(const CaseStmt *CS) { return VisitChildren(CS); } bool VisitDefaultStmt(const DefaultStmt *DS) { return VisitChildren(DS); } bool VisitUnaryOperator(const UnaryOperator *UO) { // Operator '*' and '!' are allowed as long as the operand is trivial. - if (UO->getOpcode() == UO_Deref || UO->getOpcode() == UO_AddrOf || - UO->getOpcode() == UO_LNot) + auto op = UO->getOpcode(); + if (op == UO_Deref || op == UO_AddrOf || op == UO_LNot) return Visit(UO->getSubExpr()); + if (UO->isIncrementOp() || UO->isDecrementOp()) { + // Allow increment or decrement of a POD type. + if (auto *RefExpr = dyn_cast(UO->getSubExpr())) { + if (auto *Decl = dyn_cast(RefExpr->getDecl())) + return Decl->isLocalVarDeclOrParm() && + Decl->getType().isPODType(Decl->getASTContext()); + } + } // Other operators are non-trivial. return false; } @@ -304,22 +336,6 @@ class TrivialFunctionAnalysisVisitor return VisitChildren(CO); } - bool VisitDeclRefExpr(const DeclRefExpr *DRE) { - if (auto *decl = DRE->getDecl()) { - if (isa(decl)) - return true; - if (isa(decl)) - return true; - if (auto *VD = dyn_cast(decl)) { - if (VD->hasConstantInitialization() && VD->getEvaluatedValue()) - return true; - auto *Init = VD->getInit(); - return !Init || Visit(Init); - } - } - return false; - } - bool VisitAtomicExpr(const AtomicExpr *E) { return VisitChildren(E); } bool VisitStaticAssertDecl(const StaticAssertDecl *SAD) { @@ -436,6 +452,11 @@ class TrivialFunctionAnalysisVisitor return true; } + bool VisitDeclRefExpr(const DeclRefExpr *DRE) { + // The use of a variable is trivial. + return true; + } + // Constant literal expressions are always trivial bool VisitIntegerLiteral(const IntegerLiteral *E) { return true; } bool VisitFloatingLiteral(const FloatingLiteral *E) { return true; } @@ -449,7 +470,7 @@ class TrivialFunctionAnalysisVisitor } private: - CacheTy Cache; + CacheTy &Cache; }; bool TrivialFunctionAnalysis::isTrivialImpl( @@ -474,4 +495,17 @@ bool TrivialFunctionAnalysis::isTrivialImpl( return Result; } +bool TrivialFunctionAnalysis::isTrivialImpl( + const Stmt *S, TrivialFunctionAnalysis::CacheTy &Cache) { + // If the statement isn't in the cache, conservatively assume that + // it's not trivial until analysis completes. Unlike a function case, + // we don't insert an entry into the cache until Visit returns + // since Visit* functions themselves make use of the cache. + + TrivialFunctionAnalysisVisitor V(Cache); + bool Result = V.Visit(S); + assert(Cache.contains(S) && "Top-level statement not properly cached!"); + return Result; +} + } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index e07cd31395747d..9ed8e7cab6abb9 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -11,6 +11,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include namespace clang { @@ -19,6 +20,7 @@ class CXXMethodDecl; class CXXRecordDecl; class Decl; class FunctionDecl; +class Stmt; class Type; // Ref-countability of a type is implicitly defined by Ref and RefPtr @@ -71,14 +73,17 @@ class TrivialFunctionAnalysis { public: /// \returns true if \p D is a "trivial" function. bool isTrivial(const Decl *D) const { return isTrivialImpl(D, TheCache); } + bool isTrivial(const Stmt *S) const { return isTrivialImpl(S, TheCache); } private: friend class TrivialFunctionAnalysisVisitor; - using CacheTy = llvm::DenseMap; + using CacheTy = + llvm::DenseMap, bool>; mutable CacheTy TheCache{}; static bool isTrivialImpl(const Decl *D, CacheTy &Cache); + static bool isTrivialImpl(const Stmt *S, CacheTy &Cache); }; } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 5a72f53b12edaa..6036ad58cf253c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -26,28 +26,6 @@ using namespace ento; namespace { -// for ( int a = ...) ... true -// for ( int a : ...) ... true -// if ( int* a = ) ... true -// anything else ... false -bool isDeclaredInForOrIf(const VarDecl *Var) { - assert(Var); - auto &ASTCtx = Var->getASTContext(); - auto parent = ASTCtx.getParents(*Var); - - if (parent.size() == 1) { - if (auto *DS = parent.begin()->get()) { - DynTypedNodeList grandParent = ASTCtx.getParents(*DS); - if (grandParent.size() == 1) { - return grandParent.begin()->get() || - grandParent.begin()->get() || - grandParent.begin()->get(); - } - } - } - return false; -} - // FIXME: should be defined by anotations in the future bool isRefcountedStringsHack(const VarDecl *V) { assert(V); @@ -143,6 +121,11 @@ class UncountedLocalVarsChecker // want to visit those, so we make our own RecursiveASTVisitor. struct LocalVisitor : public RecursiveASTVisitor { const UncountedLocalVarsChecker *Checker; + + TrivialFunctionAnalysis TFA; + + using Base = RecursiveASTVisitor; + explicit LocalVisitor(const UncountedLocalVarsChecker *Checker) : Checker(Checker) { assert(Checker); @@ -155,6 +138,36 @@ class UncountedLocalVarsChecker Checker->visitVarDecl(V); return true; } + + bool TraverseIfStmt(IfStmt *IS) { + if (!TFA.isTrivial(IS)) + return Base::TraverseIfStmt(IS); + return true; + } + + bool TraverseForStmt(ForStmt *FS) { + if (!TFA.isTrivial(FS)) + return Base::TraverseForStmt(FS); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *FRS) { + if (!TFA.isTrivial(FRS)) + return Base::TraverseCXXForRangeStmt(FRS); + return true; + } + + bool TraverseWhileStmt(WhileStmt *WS) { + if (!TFA.isTrivial(WS)) + return Base::TraverseWhileStmt(WS); + return true; + } + + bool TraverseCompoundStmt(CompoundStmt *CS) { + if (!TFA.isTrivial(CS)) + return Base::TraverseCompoundStmt(CS); + return true; + } }; LocalVisitor visitor(this); @@ -189,18 +202,16 @@ class UncountedLocalVarsChecker dyn_cast_or_null(Ref->getFoundDecl())) { const auto *MaybeGuardianArgType = MaybeGuardian->getType().getTypePtr(); - if (!MaybeGuardianArgType) - return; - const CXXRecordDecl *const MaybeGuardianArgCXXRecord = - MaybeGuardianArgType->getAsCXXRecordDecl(); - if (!MaybeGuardianArgCXXRecord) - return; - - if (MaybeGuardian->isLocalVarDecl() && - (isRefCounted(MaybeGuardianArgCXXRecord) || - isRefcountedStringsHack(MaybeGuardian)) && - isGuardedScopeEmbeddedInGuardianScope(V, MaybeGuardian)) { - return; + if (MaybeGuardianArgType) { + const CXXRecordDecl *const MaybeGuardianArgCXXRecord = + MaybeGuardianArgType->getAsCXXRecordDecl(); + if (MaybeGuardianArgCXXRecord) { + if (MaybeGuardian->isLocalVarDecl() && + (isRefCounted(MaybeGuardianArgCXXRecord) || + isRefcountedStringsHack(MaybeGuardian)) && + isGuardedScopeEmbeddedInGuardianScope(V, MaybeGuardian)) + return; + } } // Parameters are guaranteed to be safe for the duration of the call @@ -219,9 +230,6 @@ class UncountedLocalVarsChecker if (!V->isLocalVarDecl()) return true; - if (isDeclaredInForOrIf(V)) - return true; - return false; } diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index e2b3401d407392..aab99197dfa49e 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -62,6 +62,8 @@ struct RefCountable { static Ref create(); void ref() {} void deref() {} + void method(); + int trivial() { return 123; } }; template T *downcast(T *t) { return t; } diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 0fcd3b21376caf..00673e91f471ea 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -2,6 +2,8 @@ #include "mock-types.h" +void someFunction(); + namespace raw_ptr { void foo() { RefCountable *bar; @@ -16,6 +18,13 @@ void foo_ref() { RefCountable automatic; RefCountable &bar = automatic; // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + someFunction(); + bar.method(); +} + +void foo_ref_trivial() { + RefCountable automatic; + RefCountable &bar = automatic; } void bar_ref(RefCountable &) {} @@ -32,6 +41,8 @@ void foo2() { // missing embedded scope here RefCountable *bar = foo.get(); // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + someFunction(); + bar->method(); } void foo3() { @@ -47,11 +58,35 @@ void foo4() { { RefCountable *bar = foo.get(); } } } + +void foo5() { + RefPtr foo; + auto* bar = foo.get(); + bar->trivial(); +} + +void foo6() { + RefPtr foo; + auto* bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + bar->method(); +} + +struct SelfReferencingStruct { + SelfReferencingStruct* ptr; + RefCountable* obj { nullptr }; +}; + +void foo7(RefCountable* obj) { + SelfReferencingStruct bar = { &bar, obj }; + bar.obj->method(); +} + } // namespace guardian_scopes namespace auto_keyword { class Foo { - RefCountable *provide_ref_ctnbl() { return nullptr; } + RefCountable *provide_ref_ctnbl(); void evil_func() { RefCountable *bar = provide_ref_ctnbl(); @@ -62,13 +97,24 @@ class Foo { // expected-warning@-1{{Local variable 'baz2' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} [[clang::suppress]] auto *baz_suppressed = provide_ref_ctnbl(); // no-warning } + + void func() { + RefCountable *bar = provide_ref_ctnbl(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + if (bar) + bar->method(); + } }; } // namespace auto_keyword namespace guardian_casts { void foo1() { RefPtr foo; - { RefCountable *bar = downcast(foo.get()); } + { + RefCountable *bar = downcast(foo.get()); + bar->method(); + } + foo->method(); } void foo2() { @@ -76,6 +122,7 @@ void foo2() { { RefCountable *bar = static_cast(downcast(foo.get())); + someFunction(); } } } // namespace guardian_casts @@ -83,7 +130,11 @@ void foo2() { namespace guardian_ref_conversion_operator { void foo() { Ref rc; - { RefCountable &rr = rc; } + { + RefCountable &rr = rc; + rr.method(); + someFunction(); + } } } // namespace guardian_ref_conversion_operator @@ -92,9 +143,47 @@ RefCountable *provide_ref_ctnbl() { return nullptr; } void foo() { // no warnings - if (RefCountable *a = provide_ref_ctnbl()) { } - for (RefCountable *a = provide_ref_ctnbl(); a != nullptr;) { } + if (RefCountable *a = provide_ref_ctnbl()) + a->trivial(); + for (RefCountable *b = provide_ref_ctnbl(); b != nullptr;) + b->trivial(); RefCountable *array[1]; - for (RefCountable *a : array) { } + for (RefCountable *c : array) + c->trivial(); + while (RefCountable *d = provide_ref_ctnbl()) + d->trivial(); + do { + RefCountable *e = provide_ref_ctnbl(); + e->trivial(); + } while (1); + someFunction(); } + +void bar() { + if (RefCountable *a = provide_ref_ctnbl()) { + // expected-warning@-1{{Local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a->method(); + } + for (RefCountable *b = provide_ref_ctnbl(); b != nullptr;) { + // expected-warning@-1{{Local variable 'b' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + b->method(); + } + RefCountable *array[1]; + for (RefCountable *c : array) { + // expected-warning@-1{{Local variable 'c' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + c->method(); + } + + while (RefCountable *d = provide_ref_ctnbl()) { + // expected-warning@-1{{Local variable 'd' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + d->method(); + } + do { + RefCountable *e = provide_ref_ctnbl(); + // expected-warning@-1{{Local variable 'e' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + e->method(); + } while (1); + someFunction(); +} + } // namespace ignore_for_if From a6382de3999280ef7bf8bb63750686cdad889cd5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 7 Mar 2024 14:39:59 +0530 Subject: [PATCH 008/158] AMDGPU: Refactor mfma hazard handling [NFC] (#84276) Try to make this editable by using functions for the number of wait states as a function of the number of passes. I'm assuming the current hazard test coverage is comprehensive. This could probably use another round to further simplify it. Alternatively, I believe this could all be expressed in a constant table indexed by an instruction classify function and number of passes. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 334 ++++++++++-------- 1 file changed, 179 insertions(+), 155 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7bed0d8ef0d670..e515b729e7d7e8 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2136,6 +2136,41 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { return WaitStatesNeeded; } +static int +GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { + // 2 pass -> 3 + // 4 pass -> 5 + // 8 pass -> 9 + // 16 pass -> 17 + return NumPasses + 1; +} + +static int +GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { + // 2 pass -> 2 + // 4 pass -> 4 + // 8 pass -> 8 + // 16 pass -> 16 + return NumPasses; +} + +static int +GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + +static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); @@ -2164,13 +2199,6 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { for (const MachineOperand &Use : MI->explicit_uses()) { const int LegacyVALUNotDotWritesVGPRWaitStates = 2; const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; - const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3; - const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5; - const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4; - const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9; - const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8; - const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17; - const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; @@ -2181,14 +2209,6 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; - const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4; - const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6; - const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10; - const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18; - const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5; - const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7; - const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11; - const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; @@ -2250,42 +2270,40 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: - if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1)) + int NumPasses = TSchedModel.computeInstrLatency(MI1); + if (ST.hasGFX940Insts()) { + if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) + break; + + NeedWaitStates = + isXDL(ST, *MI1) + ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + NumPasses) + : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + NumPasses); break; - switch (TSchedModel.computeInstrLatency(MI1)) { + } + + switch (NumPasses) { case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; - break; - case 4: - assert(ST.hasGFX940Insts()); - NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = + isDGEMM(Opc) ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = + isDGEMM(Opc) + ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + break; + case 16: + NeedWaitStates = + isDGEMM(Opc) + ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; break; - case 16: [[fallthrough]]; default: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; + llvm_unreachable("unexpected number of passes"); } } } @@ -2302,34 +2320,30 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates; break; default: - switch (TSchedModel.computeInstrLatency(MI1)) { + int NumPasses = TSchedModel.computeInstrLatency(MI1); + + if (ST.hasGFX940Insts()) { + NeedWaitStates = + isXDL(ST, *MI1) + ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( + NumPasses) + : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( + NumPasses); + break; + } + + switch (NumPasses) { case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates - : SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates; break; case 4: - assert(ST.hasGFX940Insts()); - NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates; - break; + llvm_unreachable("unexpected number of passes for mfma"); case 8: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates - : SMFMA16x16WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates; break; case 16: [[fallthrough]]; default: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates - : SMFMA32x32WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates; } } } @@ -2393,6 +2407,38 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { return WaitStatesNeeded; } +static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + +static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + +static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + +static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { if (!ST.hasGFX90AInsts()) return 0; @@ -2455,14 +2501,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; - const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4; - const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6; - const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10; - const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18; - const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5; - const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7; - const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11; - const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19; const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; @@ -2516,47 +2554,44 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { continue; unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); + int NumPasses = HazardDefLatency; int NeedWaitStates = MaxWaitStates; - switch (HazardDefLatency) { - case 2: - NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates - : SMFMA4x4WriteVgprVALUMemExpReadWaitStates; - break; - case 4: - assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) - ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates - : DMFMA4x4WriteVgprVALUReadWaitStates - : isXDL(ST, *MFMA) - ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates; - break; - case 8: - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) - ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates - : DMFMA16x16WriteVgprVALUReadWaitStates - : ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates - : SMFMA16x16WriteVgprVALUMemExpReadWaitStates; - break; - case 16: [[fallthrough]]; - default: - assert(!isDGEMM(MFMA->getOpcode())); + + if (isDGEMM(MFMA->getOpcode())) { + switch (HazardDefLatency) { + case 4: + NeedWaitStates = IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates + : DMFMA4x4WriteVgprVALUReadWaitStates; + break; + case 8: + case 16: + NeedWaitStates = IsMemOrExport + ? DMFMA16x16WriteVgprMemExpReadWaitStates + : DMFMA16x16WriteVgprVALUReadWaitStates; + break; + default: + llvm_unreachable("unexpected dgemm"); + } + } else if (ST.hasGFX940Insts()) { NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates - : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; - break; + isXDL(ST, *MFMA) + ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses) + : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( + NumPasses); + } else { + switch (HazardDefLatency) { + case 2: + NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates; + break; + case 8: + NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates; + break; + case 16: + NeedWaitStates = SMFMA32x32WriteVgprVALUMemExpReadWaitStates; + break; + default: + llvm_unreachable("unexpected number of passes for mfma"); + } } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; @@ -2585,14 +2620,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; - const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4; - const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6; - const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10; - const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18; - const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5; - const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7; - const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11; - const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; @@ -2617,42 +2644,39 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (MFMA) { int NeedWaitStates = MaxWaitStates; - switch (TSchedModel.computeInstrLatency(MFMA)) { - case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL2PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates - : SMFMA4x4WriteVgprVALUWawWaitStates; - break; - case 4: - assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); - NeedWaitStates = isDGEMM(MFMA->getOpcode()) - ? DMFMA4x4WriteVgprVALUWriteWaitStates - : isXDL(ST, *MFMA) - ? GFX940_XDL4PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates; - break; - case 8: - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) ? DMFMA16x16WriteVgprVALUWriteWaitStates - : + int NumPasses = TSchedModel.computeInstrLatency(MFMA); - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) ? GFX940_XDL8PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates - : SMFMA16x16WriteVgprVALUWawWaitStates; - break; - case 16: [[fallthrough]]; - default: - assert(!isDGEMM(MFMA->getOpcode())); + if (isDGEMM(MFMA->getOpcode())) { + switch (NumPasses) { + case 4: + NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; + break; + case 8: + case 16: + NeedWaitStates = DMFMA16x16WriteVgprVALUWriteWaitStates; + break; + default: + llvm_unreachable("unexpected number of cycles for dgemm"); + } + } else if (ST.hasGFX940Insts()) { NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL16PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates - : SMFMA32x32WriteVgprVALUWawWaitStates; - break; + isXDL(ST, *MFMA) + ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses) + : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); + } else { + switch (NumPasses) { + case 2: + NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates; + break; + case 8: + NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates; + break; + case 16: + NeedWaitStates = SMFMA32x32WriteVgprVALUWawWaitStates; + break; + default: + llvm_unreachable("Unexpected number of passes for mfma"); + } } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; From 812c22b2ef5f3f194b8d452fc9f95714dce572f2 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Thu, 7 Mar 2024 09:28:20 +0000 Subject: [PATCH 009/158] [RemoveDIs][wasm] Apply current debug mode to new function protos (#84292) This trips the verifier changes added in #83251 Stimulated by llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll --- llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp index 90e81991284710..abcb1d0f16286e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp @@ -136,6 +136,7 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) { Function::Create(NewType, F.getLinkage(), F.getName() + ".fixed_sig"); NewF->setAttributes(F.getAttributes()); NewF->removeFnAttr("no-prototype"); + NewF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat; Replacements.emplace_back(&F, NewF); } From 503c55e17037436dcd45ac69dea8967e67e3f5e8 Mon Sep 17 00:00:00 2001 From: ostannard Date: Thu, 7 Mar 2024 09:28:49 +0000 Subject: [PATCH 010/158] [AArch64] Move SLS later in pass pipeline (#84210) Currently, the SLS hardening pass is run before the machine outliner, which means that the outliner creates new functions and calls which do not have the SLS hardening applied. The fix for this is to move the SLS passes to after the outliner, as has recently been done for the return address signing pass. This also avoids a bug where the SLS outliner emits code with instructions after a return, which the outliner doesn't correctly handle. --- .../Target/AArch64/AArch64SLSHardening.cpp | 15 +++++++++++- .../Target/AArch64/AArch64TargetMachine.cpp | 5 ++-- llvm/test/CodeGen/AArch64/O0-pipeline.ll | 4 ++-- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 4 ++-- .../AArch64/arm64-opt-remarks-lazy-bfi.ll | 24 +++++++++++++++---- llvm/test/CodeGen/AArch64/sls-crash.ll | 6 +++++ .../AArch64/sls-stackprotector-outliner.ll | 12 ++++++---- 7 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sls-crash.ll diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp index ce3bc0b1837558..41bbc003fd9bf7 100644 --- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp @@ -220,7 +220,20 @@ void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - assert (MF.size() == 1); + + // Depending on whether this pass is in the same FunctionPassManager as the + // IR->MIR conversion, the thunk may be completely empty, or contain a single + // basic block with a single return instruction. Normalise it to contain a + // single empty basic block. + if (MF.size() == 1) { + assert(MF.front().size() == 1); + assert(MF.front().front().getOpcode() == AArch64::RET); + MF.front().erase(MF.front().begin()); + } else { + assert(MF.size() == 0); + MF.push_back(MF.CreateMachineBasicBlock()); + } + MachineBasicBlock *Entry = &MF.front(); Entry->clear(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 64c4ecd1fd6d51..e5e60459e8148a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -820,9 +820,6 @@ void AArch64PassConfig::addPreSched2() { // info. addPass(createAArch64SpeculationHardeningPass()); - addPass(createAArch64IndirectThunks()); - addPass(createAArch64SLSHardeningPass()); - if (TM->getOptLevel() != CodeGenOptLevel::None) { if (EnableFalkorHWPFFix) addPass(createFalkorHWPFFixPass()); @@ -855,6 +852,8 @@ void AArch64PassConfig::addPreEmitPass() { } void AArch64PassConfig::addPostBBSections() { + addPass(createAArch64IndirectThunks()); + addPass(createAArch64SLSHardeningPass()); addPass(createAArch64PointerAuthPass()); if (EnableBranchTargets) addPass(createAArch64BranchTargetsPass()); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 4f87bb2a3ee811..d1e38b85fa9c36 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -64,8 +64,6 @@ ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Insert fentry calls ; CHECK-NEXT: Insert XRay ops @@ -75,6 +73,8 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index ae0dbed09979b4..eee9a27c90c19e 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -205,8 +205,6 @@ ; CHECK-NEXT: AArch64 load / store optimization pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase @@ -227,6 +225,8 @@ ; CHECK-NEXT: Machine Sanitizer Binary Metadata ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index 580886520789e3..3ffaf962425b38 100644 --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -32,8 +32,16 @@ ; HOTNESS: Freeing Pass 'Machine Outliner' ; HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' -; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... ; HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... @@ -73,8 +81,16 @@ ; NO_HOTNESS: Freeing Pass 'Machine Outliner' ; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' -; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... diff --git a/llvm/test/CodeGen/AArch64/sls-crash.ll b/llvm/test/CodeGen/AArch64/sls-crash.ll new file mode 100644 index 00000000000000..5dfc3c7824a8b6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sls-crash.ll @@ -0,0 +1,6 @@ +; RUN: llc -mtriple aarch64 -O0 < %s + +define hidden void @foo() "target-features"="+harden-sls-blr" { +entry: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll index 5f3b1503b46b32..b281204a66e46a 100644 --- a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll +++ b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll @@ -18,7 +18,8 @@ define hidden void @_ZTv0_n24_N2C6D1Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -45,7 +46,8 @@ define hidden void @_ZTv0_n24_N2C6D0Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -71,7 +73,8 @@ define hidden void @_ZTv0_n24_N3C10D1Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -97,7 +100,8 @@ define hidden void @_ZTv0_n24_N3C10D0Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB3_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb From 6e79f77adbbd338848ea770f2f2b110bc57a3990 Mon Sep 17 00:00:00 2001 From: Vincent Lee Date: Thu, 7 Mar 2024 01:46:36 -0800 Subject: [PATCH 011/158] [dataflow][nfc] Fix u8 string usage with c++20 (#84291) Clang returns an error when compiling this file with c++20 ``` error: ISO C++20 does not permit initialization of char array with UTF-8 string literal ``` It seems like c++20 treats u8strings differently than strings (probably needs char8_t). Make this a string to fix the error. --- clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp index ff4e18de2c70f1..d9f40d28859f5e 100644 --- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -500,7 +500,7 @@ class HTMLLogger : public Logger { for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { std::string Name = blockID(I); // Rightwards arrow, vertical line - char ConvergenceMarker[] = u8"\\n\u2192\u007c"; + char ConvergenceMarker[] = "\\n\u2192\u007c"; if (BlockConverged[I]) Name += ConvergenceMarker; GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" From 84f483dbeeba5ecadbf3e4a75bfb71525a3fa332 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 7 Mar 2024 11:01:09 +0100 Subject: [PATCH 012/158] [libc] Remove UB specializations of type traits for `BigInt` (#84035) The standard specifies that it it UB to specialize the following traits: - `std::is_integral` - `std::is_unsigned` - `std::make_unsigned` - `std::make_signed` This patch: - Removes specializations for `BigInt` - Transforms SFINAE for `bit.h` functions from template parameter to return type (This makes specialization easier). - Adds `BigInt` specialization for `bit.h` functions. - Fixes code depending on previous specializations. --- libc/src/__support/CMakeLists.txt | 1 + libc/src/__support/CPP/bit.h | 112 ++++++---- libc/src/__support/UInt.h | 192 ++++++++++++------ libc/src/__support/float_to_string.h | 2 +- libc/src/__support/integer_to_string.h | 19 +- libc/test/UnitTest/CMakeLists.txt | 1 + libc/test/UnitTest/LibcTest.cpp | 10 +- libc/test/UnitTest/LibcTest.h | 1 + libc/test/UnitTest/TestLogger.cpp | 8 +- libc/test/src/__support/CPP/bit_test.cpp | 49 +++-- .../llvm-project-overlay/libc/BUILD.bazel | 1 + .../libc/test/UnitTest/BUILD.bazel | 1 + 12 files changed, 272 insertions(+), 125 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 1a4b3e9a2145c0..17c04aa57e6fd6 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -95,6 +95,7 @@ add_header_library( HDRS integer_to_string.h DEPENDS + .uint libc.src.__support.common libc.src.__support.CPP.algorithm libc.src.__support.CPP.limits diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 7d11e7d5c497e0..bc2f595845a95f 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -27,13 +27,14 @@ namespace LIBC_NAMESPACE::cpp { // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. -template < - typename To, typename From, - typename = cpp::enable_if_t::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value, + To> +bit_cast(const From &from) { MSAN_UNPOISON(&from, sizeof(From)); #if LIBC_HAS_BUILTIN(__builtin_bit_cast) return __builtin_bit_cast(To, from); @@ -51,8 +52,10 @@ LIBC_INLINE constexpr To bit_cast(const From &from) { #endif // LIBC_HAS_BUILTIN(__builtin_bit_cast) } -template >> -[[nodiscard]] LIBC_INLINE constexpr bool has_single_bit(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, + bool> +has_single_bit(T value) { return (value != 0) && ((value & (value - 1)) == 0); } @@ -70,8 +73,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(T value) { if (!value) return cpp::numeric_limits::digits; if (value & 0x1) @@ -103,8 +107,9 @@ ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(T value) { if (!value) return cpp::numeric_limits::digits; // Bisection method. @@ -135,8 +140,9 @@ ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { return cpp::countl_zero(~value); } @@ -147,8 +153,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { return cpp::countr_zero(~value); } @@ -156,8 +163,9 @@ template >> /// Returns 0 otherwise. /// /// Ex. bit_width(5) == 3. -template >> -[[nodiscard]] LIBC_INLINE constexpr int bit_width(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { return cpp::numeric_limits::digits - cpp::countl_zero(value); } @@ -165,8 +173,9 @@ template >> /// nonzero. Returns 0 otherwise. /// /// Ex. bit_floor(5) == 4. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_floor(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_floor(T value) { if (!value) return 0; return T(1) << (cpp::bit_width(value) - 1); @@ -179,8 +188,9 @@ template >> /// /// The return value is undefined if the input is larger than the largest power /// of two representable in T. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_ceil(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_ceil(T value) { if (value < 2) return 1; return T(1) << cpp::bit_width(value - 1u); @@ -190,28 +200,31 @@ template >> // from https://blog.regehr.org/archives/1063. // Forward-declare rotr so that rotl can use it. -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate); +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotl(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotr(value, -rotate); + return cpp::rotr(value, -rotate); return (value << rotate) | (value >> (N - rotate)); } -template -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotl(value, -rotate); + return cpp::rotl(value, -rotate); return (value >> rotate) | (value << (N - rotate)); } @@ -226,33 +239,44 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { } } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_one(T value) { return first_leading_zero(static_cast(~value)); } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(static_cast(~value)) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } /// Count number of 1's aka population count or hamming weight. /// /// Only unsigned integral types are allowed. -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_ones(T value) { +// TODO: rename as 'popcount' to follow the standard +// https://en.cppreference.com/w/cpp/numeric/popcount +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +count_ones(T value) { int count = 0; for (int i = 0; i != cpp::numeric_limits::digits; ++i) if ((value >> i) & 0x1) @@ -272,8 +296,10 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) // TODO: 128b specializations? #undef ADD_SPECIALIZATION -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_zeros(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +count_zeros(T value) { return count_ones(static_cast(~value)); } diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 5973e6fab1d7d5..b3d8f00b9a01a5 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -43,6 +43,9 @@ struct BigInt { static_assert(is_integral_v && is_unsigned_v, "WordType must be unsigned integer."); + using word_type = WordType; + LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; + LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR static constexpr size_t WORD_SIZE = sizeof(WordType) * CHAR_BIT; @@ -50,6 +53,10 @@ struct BigInt { "Number of bits in BigInt should be a multiple of WORD_SIZE."); LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; + + using unsigned_type = BigInt; + using signed_type = BigInt; + cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; @@ -579,19 +586,33 @@ struct BigInt { return *this; } - LIBC_INLINE constexpr uint64_t clz() { - uint64_t leading_zeroes = 0; - for (size_t i = WORD_COUNT; i > 0; --i) { - if (val[i - 1] == 0) { - leading_zeroes += WORD_SIZE; - } else { - leading_zeroes += countl_zero(val[i - 1]); + // TODO: remove and use cpp::countl_zero below. + [[nodiscard]] LIBC_INLINE constexpr int clz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int leading_zeroes = 0; + for (auto i = val.size(); i > 0;) { + --i; + const int zeroes = countl_zero(val[i]); + leading_zeroes += zeroes; + if (zeroes != word_digits) break; - } } return leading_zeroes; } + // TODO: remove and use cpp::countr_zero below. + [[nodiscard]] LIBC_INLINE constexpr int ctz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int trailing_zeroes = 0; + for (auto word : val) { + const int zeroes = countr_zero(word); + trailing_zeroes += zeroes; + if (zeroes != word_digits) + break; + } + return trailing_zeroes; + } + LIBC_INLINE constexpr void shift_left(size_t s) { if constexpr (Bits == WORD_SIZE) { // Use native types if possible. @@ -916,66 +937,123 @@ template <> class numeric_limits> { LIBC_INLINE_VAR static constexpr int digits = 128; }; -// Provides is_integral of U/Int<128>, U/Int<192>, U/Int<256>. -template -struct is_integral> : cpp::true_type {}; +// type traits to determine whether a T is a cpp::BigInt. +template struct is_big_int : cpp::false_type {}; -// Provides is_unsigned of UInt<128>, UInt<192>, UInt<256>. template -struct is_unsigned> : cpp::bool_constant {}; - -template -struct make_unsigned> - : type_identity> {}; - -template -struct make_signed> - : type_identity> {}; - -namespace internal { -template struct is_custom_uint : cpp::false_type {}; - -template -struct is_custom_uint> : cpp::true_type {}; -} // namespace internal - -// bit_cast to UInt -// Note: The standard scheme for SFINAE selection is to have exactly one -// function instanciation valid at a time. This is usually done by having a -// predicate in one function and the negated predicate in the other one. -// e.g. -// template::value == true> ... -// template::value == false> ... -// -// Unfortunately this would make the default 'cpp::bit_cast' aware of -// 'is_custom_uint' (or any other customization). To prevent exposing all -// customizations in the original function, we create a different function with -// four 'typename's instead of three - otherwise it would be considered as a -// redeclaration of the same function leading to "error: template parameter -// redefines default argument". -template ::value && - cpp::is_trivially_copyable::value>, - typename = cpp::enable_if_t::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +struct is_big_int> : cpp::true_type {}; + +template +LIBC_INLINE_VAR constexpr bool is_big_int_v = is_big_int::value; + +// Specialization of cpp::bit_cast ('bit.h') from T to BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value && is_big_int::value, + To> +bit_cast(const From &from) { To out; using Storage = decltype(out.val); out.val = cpp::bit_cast(from); return out; } -// bit_cast from UInt -template < - typename To, size_t Bits, - typename = cpp::enable_if_t) && - cpp::is_trivially_constructible::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable>::value>> -LIBC_INLINE constexpr To bit_cast(const UInt &from) { +// Specialization of cpp::bit_cast ('bit.h') from BigInt to T. +template +LIBC_INLINE constexpr cpp::enable_if_t< + sizeof(To) == sizeof(UInt) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable>::value, + To> +bit_cast(const UInt &from) { return cpp::bit_cast(from.val); } +// Specialization of cpp::has_single_bit ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, bool> +has_single_bit(T value) { + int bits = 0; + for (auto word : value.val) { + if (word == 0) + continue; + bits += count_ones(word); + if (bits > 1) + return false; + } + return bits == 1; +} + +// Specialization of cpp::countr_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(const T &value) { + return value.ctz(); +} + +// Specialization of cpp::countl_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(const T &value) { + return value.clz(); +} + +// Specialization of cpp::countl_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countl_zero(~value); +} + +// Specialization of cpp::countr_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countr_zero(~value); +} + +// Specialization of cpp::bit_width ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { + return cpp::numeric_limits::digits - cpp::countl_zero(value); +} + +// Forward-declare rotr so that rotl can use it. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); + +// Specialization of cpp::rotl ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotr(value, -rotate); + return (value << rotate) | (value >> (N - rotate)); +} + +// Specialization of cpp::rotr ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotl(value, -rotate); + return (value >> rotate) | (value << (N - rotate)); +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 744842ced8d772..27476433a94575 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -713,7 +713,7 @@ template <> class FloatToString { float_as_fixed.shift_left(SHIFT_AMOUNT); // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < EXTRA_INT_WIDTH) { + if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { cpp::UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 81ed21ccfca166..a5872dce652036 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -67,6 +67,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/UInt.h" // is_big_int #include "src/__support/common.h" namespace LIBC_NAMESPACE { @@ -149,6 +150,18 @@ template class StringBufferWriterImpl { using StringBufferWriter = StringBufferWriterImpl; using BackwardStringBufferWriter = StringBufferWriterImpl; +template struct IntegerWriterUnsigned {}; + +template +struct IntegerWriterUnsigned>> { + using type = cpp::make_unsigned_t; +}; + +template +struct IntegerWriterUnsigned>> { + using type = typename T::unsigned_type; +}; + } // namespace details namespace radix { @@ -163,7 +176,7 @@ template using Custom = details::Fmt; // See file header for documentation. template class IntegerToString { - static_assert(cpp::is_integral_v); + static_assert(cpp::is_integral_v || cpp::is_big_int_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { constexpr auto MAX_DIGITS = []() -> size_t { @@ -208,8 +221,8 @@ template class IntegerToString { // An internal stateless structure that handles the number formatting logic. struct IntegerWriter { - static_assert(cpp::is_integral_v); - using UNSIGNED_T = cpp::make_unsigned_t; + static_assert(cpp::is_integral_v || cpp::is_big_int_v); + using UNSIGNED_T = typename details::IntegerWriterUnsigned::type; LIBC_INLINE static char digit_char(uint8_t digit) { if (digit < 10) diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 4668f0061975f8..36837c553efce1 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -74,6 +74,7 @@ add_unittest_framework_library( libc.src.__support.CPP.type_traits libc.src.__support.fixed_point.fx_rep libc.src.__support.OSUtil.osutil + libc.src.__support.uint libc.src.__support.uint128 ) diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index 7b0e4fca83683b..0340f7ed37100e 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -38,7 +38,8 @@ TestLogger &operator<<(TestLogger &logger, Location Loc) { // When the value is UInt128, __uint128_t or wider, show its hexadecimal // digits. template -cpp::enable_if_t && (sizeof(T) > sizeof(uint64_t)), +cpp::enable_if_t<(cpp::is_integral_v && (sizeof(T) > sizeof(uint64_t))) || + cpp::is_big_int_v, cpp::string> describeValue(T Value) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); @@ -47,11 +48,10 @@ describeValue(T Value) { } // When the value is of a standard integral type, just display it as normal. -template -cpp::enable_if_t && - sizeof(ValType) <= sizeof(uint64_t), +template +cpp::enable_if_t && (sizeof(T) <= sizeof(uint64_t)), cpp::string> -describeValue(ValType Value) { +describeValue(T Value) { return cpp::to_string(Value); } diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index 639f6005832576..d26d6490bcb572 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -127,6 +127,7 @@ class Test { // of type promotion. template || + cpp::is_big_int_v || cpp::is_fixed_point_v, int> = 0> bool test(TestCond Cond, ValType LHS, ValType RHS, const char *LHSStr, diff --git a/libc/test/UnitTest/TestLogger.cpp b/libc/test/UnitTest/TestLogger.cpp index 6bb0e17dc3888e..469b3a11d57d9b 100644 --- a/libc/test/UnitTest/TestLogger.cpp +++ b/libc/test/UnitTest/TestLogger.cpp @@ -2,6 +2,7 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" #include "src/__support/OSUtil/io.h" // write_to_stderr +#include "src/__support/UInt.h" // is_big_int #include "src/__support/UInt128.h" #include @@ -47,8 +48,9 @@ template <> TestLogger &TestLogger::operator<<(void *addr) { } template TestLogger &TestLogger::operator<<(T t) { - if constexpr (cpp::is_integral_v && cpp::is_unsigned_v && - sizeof(T) > sizeof(uint64_t)) { + if constexpr (cpp::is_big_int_v || + (cpp::is_integral_v && cpp::is_unsigned_v && + (sizeof(T) > sizeof(uint64_t)))) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); const IntegerToString buffer(t); return *this << buffer.view(); @@ -68,7 +70,7 @@ template TestLogger &TestLogger::operator<< (unsigned short); template TestLogger &TestLogger::operator<< (unsigned int); template TestLogger &TestLogger::operator<< (unsigned long); template TestLogger & -TestLogger::operator<< (unsigned long long); + TestLogger::operator<< (unsigned long long); #ifdef __SIZEOF_INT128__ template TestLogger &TestLogger::operator<< <__uint128_t>(__uint128_t); diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 115a5d505c4b7a..1e3d895e6453a7 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -12,21 +12,44 @@ #include +#include + namespace LIBC_NAMESPACE::cpp { -using UnsignedTypes = - testing::TypeList; + +using UnsignedTypes = testing::TypeList< #if defined(__SIZEOF_INT128__) - __uint128_t, + __uint128_t, #endif - cpp::UInt<128>>; + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long, cpp::UInt<128>>; TYPED_TEST(LlvmLibcBitTest, HasSingleBit, UnsignedTypes) { - EXPECT_FALSE(has_single_bit(T(0))); - EXPECT_FALSE(has_single_bit(~T(0))); + constexpr auto ZERO = T(0); + constexpr auto ALL_ONES = T(~ZERO); + EXPECT_FALSE(has_single_bit(ZERO)); + EXPECT_FALSE(has_single_bit(ALL_ONES)); + for (T value = 1; value; value <<= 1) EXPECT_TRUE(has_single_bit(value)); + + // We test that if two bits are set has_single_bit returns false. + // We do this by setting the highest or lowest bit depending or where the + // current bit is. This is a bit convoluted but it helps catch a bug on BigInt + // where we have to work on an element-by-element basis. + constexpr auto MIDPOINT = T(ALL_ONES / 2); + constexpr auto LSB = T(1); + constexpr auto MSB = T(~(ALL_ONES >> 1)); + for (T value = 1; value; value <<= 1) { + auto two_bits_value = value | ((value <= MIDPOINT) ? MSB : LSB); + EXPECT_FALSE(has_single_bit(two_bits_value)); + } } TYPED_TEST(LlvmLibcBitTest, CountLZero, UnsignedTypes) { @@ -206,39 +229,39 @@ TEST(LlvmLibcBitTest, Rotr) { rotr(0x12345678deadbeefULL, -19)); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_zero(~(T(1) << i)), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_one(static_cast(0)), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_one(T(1) << i), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_one(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); } -TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits::digits); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } -TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { EXPECT_EQ(count_ones(T(0)), 0); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 49a454379e1c7a..5c6cf761ebe7de 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -507,6 +507,7 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", + ":__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index a5c18fbb68b398..44692947af7c08 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -18,6 +18,7 @@ libc_support_library( "//libc:__support_cpp_string", "//libc:__support_cpp_string_view", "//libc:__support_osutil_io", + "//libc:__support_uint", "//libc:__support_uint128", ], ) From 27844cb2fa2a66fe90f12240809b260709fb2cc9 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 7 Mar 2024 11:06:50 +0100 Subject: [PATCH 013/158] Revert "[libc] Remove UB specializations of type traits for `BigInt`" (#84297) Reverts llvm/llvm-project#84035 Several bots are failing: - https://lab.llvm.org/buildbot/#/builders/223/builds/37522 - https://lab.llvm.org/buildbot/#/builders/162/builds/51978 - https://lab.llvm.org/buildbot/#/builders/163/builds/52560 - https://lab.llvm.org/buildbot/#/builders/250/builds/19619 --- libc/src/__support/CMakeLists.txt | 1 - libc/src/__support/CPP/bit.h | 112 ++++------ libc/src/__support/UInt.h | 192 ++++++------------ libc/src/__support/float_to_string.h | 2 +- libc/src/__support/integer_to_string.h | 19 +- libc/test/UnitTest/CMakeLists.txt | 1 - libc/test/UnitTest/LibcTest.cpp | 10 +- libc/test/UnitTest/LibcTest.h | 1 - libc/test/UnitTest/TestLogger.cpp | 8 +- libc/test/src/__support/CPP/bit_test.cpp | 49 ++--- .../llvm-project-overlay/libc/BUILD.bazel | 1 - .../libc/test/UnitTest/BUILD.bazel | 1 - 12 files changed, 125 insertions(+), 272 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 17c04aa57e6fd6..1a4b3e9a2145c0 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -95,7 +95,6 @@ add_header_library( HDRS integer_to_string.h DEPENDS - .uint libc.src.__support.common libc.src.__support.CPP.algorithm libc.src.__support.CPP.limits diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index bc2f595845a95f..7d11e7d5c497e0 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -27,14 +27,13 @@ namespace LIBC_NAMESPACE::cpp { // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. -template -LIBC_INLINE constexpr cpp::enable_if_t< - (sizeof(To) == sizeof(From)) && - cpp::is_trivially_constructible::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable::value, - To> -bit_cast(const From &from) { +template < + typename To, typename From, + typename = cpp::enable_if_t::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value>> +LIBC_INLINE constexpr To bit_cast(const From &from) { MSAN_UNPOISON(&from, sizeof(From)); #if LIBC_HAS_BUILTIN(__builtin_bit_cast) return __builtin_bit_cast(To, from); @@ -52,10 +51,8 @@ bit_cast(const From &from) { #endif // LIBC_HAS_BUILTIN(__builtin_bit_cast) } -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, - bool> -has_single_bit(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr bool has_single_bit(T value) { return (value != 0) && ((value & (value - 1)) == 0); } @@ -73,9 +70,8 @@ has_single_bit(T value) { /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countr_zero(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int countr_zero(T value) { if (!value) return cpp::numeric_limits::digits; if (value & 0x1) @@ -107,9 +103,8 @@ ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countl_zero(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int countl_zero(T value) { if (!value) return cpp::numeric_limits::digits; // Bisection method. @@ -140,9 +135,8 @@ ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countl_one(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int countl_one(T value) { return cpp::countl_zero(~value); } @@ -153,9 +147,8 @@ countl_one(T value) { /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countr_one(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int countr_one(T value) { return cpp::countr_zero(~value); } @@ -163,9 +156,8 @@ countr_one(T value) { /// Returns 0 otherwise. /// /// Ex. bit_width(5) == 3. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -bit_width(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int bit_width(T value) { return cpp::numeric_limits::digits - cpp::countl_zero(value); } @@ -173,9 +165,8 @@ bit_width(T value) { /// nonzero. Returns 0 otherwise. /// /// Ex. bit_floor(5) == 4. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -bit_floor(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr T bit_floor(T value) { if (!value) return 0; return T(1) << (cpp::bit_width(value) - 1); @@ -188,9 +179,8 @@ bit_floor(T value) { /// /// The return value is undefined if the input is larger than the largest power /// of two representable in T. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -bit_ceil(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr T bit_ceil(T value) { if (value < 2) return 1; return T(1) << cpp::bit_width(value - 1u); @@ -200,31 +190,28 @@ bit_ceil(T value) { // from https://blog.regehr.org/archives/1063. // Forward-declare rotr so that rotl can use it. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotr(T value, int rotate); +template >> +[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate); -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotl(T value, int rotate) { +template >> +[[nodiscard]] LIBC_INLINE constexpr T rotl(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotr(value, -rotate); + return cpp::rotr(value, -rotate); return (value << rotate) | (value >> (N - rotate)); } -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotr(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotl(value, -rotate); + return cpp::rotl(value, -rotate); return (value >> rotate) | (value << (N - rotate)); } @@ -239,44 +226,33 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { } } -// TODO: remove from 'bit.h' as it is not a standard function. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -first_leading_zero(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } -// TODO: remove from 'bit.h' as it is not a standard function. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -first_leading_one(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_leading_one(T value) { return first_leading_zero(static_cast(~value)); } -// TODO: remove from 'bit.h' as it is not a standard function. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -first_trailing_zero(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(static_cast(~value)) + 1; } -// TODO: remove from 'bit.h' as it is not a standard function. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -first_trailing_one(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } /// Count number of 1's aka population count or hamming weight. /// /// Only unsigned integral types are allowed. -// TODO: rename as 'popcount' to follow the standard -// https://en.cppreference.com/w/cpp/numeric/popcount -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -count_ones(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int count_ones(T value) { int count = 0; for (int i = 0; i != cpp::numeric_limits::digits; ++i) if ((value >> i) & 0x1) @@ -296,10 +272,8 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) // TODO: 128b specializations? #undef ADD_SPECIALIZATION -// TODO: remove from 'bit.h' as it is not a standard function. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -count_zeros(T value) { +template >> +[[nodiscard]] LIBC_INLINE constexpr int count_zeros(T value) { return count_ones(static_cast(~value)); } diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index b3d8f00b9a01a5..5973e6fab1d7d5 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -43,9 +43,6 @@ struct BigInt { static_assert(is_integral_v && is_unsigned_v, "WordType must be unsigned integer."); - using word_type = WordType; - LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; - LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR static constexpr size_t WORD_SIZE = sizeof(WordType) * CHAR_BIT; @@ -53,10 +50,6 @@ struct BigInt { "Number of bits in BigInt should be a multiple of WORD_SIZE."); LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; - - using unsigned_type = BigInt; - using signed_type = BigInt; - cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; @@ -586,33 +579,19 @@ struct BigInt { return *this; } - // TODO: remove and use cpp::countl_zero below. - [[nodiscard]] LIBC_INLINE constexpr int clz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int leading_zeroes = 0; - for (auto i = val.size(); i > 0;) { - --i; - const int zeroes = countl_zero(val[i]); - leading_zeroes += zeroes; - if (zeroes != word_digits) + LIBC_INLINE constexpr uint64_t clz() { + uint64_t leading_zeroes = 0; + for (size_t i = WORD_COUNT; i > 0; --i) { + if (val[i - 1] == 0) { + leading_zeroes += WORD_SIZE; + } else { + leading_zeroes += countl_zero(val[i - 1]); break; + } } return leading_zeroes; } - // TODO: remove and use cpp::countr_zero below. - [[nodiscard]] LIBC_INLINE constexpr int ctz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int trailing_zeroes = 0; - for (auto word : val) { - const int zeroes = countr_zero(word); - trailing_zeroes += zeroes; - if (zeroes != word_digits) - break; - } - return trailing_zeroes; - } - LIBC_INLINE constexpr void shift_left(size_t s) { if constexpr (Bits == WORD_SIZE) { // Use native types if possible. @@ -937,121 +916,64 @@ template <> class numeric_limits> { LIBC_INLINE_VAR static constexpr int digits = 128; }; -// type traits to determine whether a T is a cpp::BigInt. -template struct is_big_int : cpp::false_type {}; - +// Provides is_integral of U/Int<128>, U/Int<192>, U/Int<256>. template -struct is_big_int> : cpp::true_type {}; - -template -LIBC_INLINE_VAR constexpr bool is_big_int_v = is_big_int::value; - -// Specialization of cpp::bit_cast ('bit.h') from T to BigInt. -template -LIBC_INLINE constexpr cpp::enable_if_t< - (sizeof(To) == sizeof(From)) && cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable::value && is_big_int::value, - To> -bit_cast(const From &from) { - To out; - using Storage = decltype(out.val); - out.val = cpp::bit_cast(from); - return out; -} +struct is_integral> : cpp::true_type {}; -// Specialization of cpp::bit_cast ('bit.h') from BigInt to T. -template -LIBC_INLINE constexpr cpp::enable_if_t< - sizeof(To) == sizeof(UInt) && - cpp::is_trivially_constructible::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable>::value, - To> -bit_cast(const UInt &from) { - return cpp::bit_cast(from.val); -} - -// Specialization of cpp::has_single_bit ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, bool> -has_single_bit(T value) { - int bits = 0; - for (auto word : value.val) { - if (word == 0) - continue; - bits += count_ones(word); - if (bits > 1) - return false; - } - return bits == 1; -} - -// Specialization of cpp::countr_zero ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countr_zero(const T &value) { - return value.ctz(); -} +// Provides is_unsigned of UInt<128>, UInt<192>, UInt<256>. +template +struct is_unsigned> : cpp::bool_constant {}; -// Specialization of cpp::countl_zero ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countl_zero(const T &value) { - return value.clz(); -} +template +struct make_unsigned> + : type_identity> {}; -// Specialization of cpp::countl_one ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countl_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countl_zero(~value); -} +template +struct make_signed> + : type_identity> {}; -// Specialization of cpp::countr_one ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -countr_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countr_zero(~value); -} +namespace internal { +template struct is_custom_uint : cpp::false_type {}; -// Specialization of cpp::bit_width ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -bit_width(T value) { - return cpp::numeric_limits::digits - cpp::countl_zero(value); -} +template +struct is_custom_uint> : cpp::true_type {}; +} // namespace internal -// Forward-declare rotr so that rotl can use it. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotr(T value, int rotate); - -// Specialization of cpp::rotl ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotl(T value, int rotate) { - constexpr unsigned N = cpp::numeric_limits::digits; - rotate = rotate % N; - if (!rotate) - return value; - if (rotate < 0) - return cpp::rotr(value, -rotate); - return (value << rotate) | (value >> (N - rotate)); +// bit_cast to UInt +// Note: The standard scheme for SFINAE selection is to have exactly one +// function instanciation valid at a time. This is usually done by having a +// predicate in one function and the negated predicate in the other one. +// e.g. +// template::value == true> ... +// template::value == false> ... +// +// Unfortunately this would make the default 'cpp::bit_cast' aware of +// 'is_custom_uint' (or any other customization). To prevent exposing all +// customizations in the original function, we create a different function with +// four 'typename's instead of three - otherwise it would be considered as a +// redeclaration of the same function leading to "error: template parameter +// redefines default argument". +template ::value && + cpp::is_trivially_copyable::value>, + typename = cpp::enable_if_t::value>> +LIBC_INLINE constexpr To bit_cast(const From &from) { + To out; + using Storage = decltype(out.val); + out.val = cpp::bit_cast(from); + return out; } -// Specialization of cpp::rotr ('bit.h') for BigInt. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -rotr(T value, int rotate) { - constexpr unsigned N = cpp::numeric_limits::digits; - rotate = rotate % N; - if (!rotate) - return value; - if (rotate < 0) - return cpp::rotl(value, -rotate); - return (value >> rotate) | (value << (N - rotate)); +// bit_cast from UInt +template < + typename To, size_t Bits, + typename = cpp::enable_if_t) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable>::value>> +LIBC_INLINE constexpr To bit_cast(const UInt &from) { + return cpp::bit_cast(from.val); } } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 27476433a94575..744842ced8d772 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -713,7 +713,7 @@ template <> class FloatToString { float_as_fixed.shift_left(SHIFT_AMOUNT); // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { + if (float_as_fixed.clz() < EXTRA_INT_WIDTH) { cpp::UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index a5872dce652036..81ed21ccfca166 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -67,7 +67,6 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt.h" // is_big_int #include "src/__support/common.h" namespace LIBC_NAMESPACE { @@ -150,18 +149,6 @@ template class StringBufferWriterImpl { using StringBufferWriter = StringBufferWriterImpl; using BackwardStringBufferWriter = StringBufferWriterImpl; -template struct IntegerWriterUnsigned {}; - -template -struct IntegerWriterUnsigned>> { - using type = cpp::make_unsigned_t; -}; - -template -struct IntegerWriterUnsigned>> { - using type = typename T::unsigned_type; -}; - } // namespace details namespace radix { @@ -176,7 +163,7 @@ template using Custom = details::Fmt; // See file header for documentation. template class IntegerToString { - static_assert(cpp::is_integral_v || cpp::is_big_int_v); + static_assert(cpp::is_integral_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { constexpr auto MAX_DIGITS = []() -> size_t { @@ -221,8 +208,8 @@ template class IntegerToString { // An internal stateless structure that handles the number formatting logic. struct IntegerWriter { - static_assert(cpp::is_integral_v || cpp::is_big_int_v); - using UNSIGNED_T = typename details::IntegerWriterUnsigned::type; + static_assert(cpp::is_integral_v); + using UNSIGNED_T = cpp::make_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { if (digit < 10) diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 36837c553efce1..4668f0061975f8 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -74,7 +74,6 @@ add_unittest_framework_library( libc.src.__support.CPP.type_traits libc.src.__support.fixed_point.fx_rep libc.src.__support.OSUtil.osutil - libc.src.__support.uint libc.src.__support.uint128 ) diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index 0340f7ed37100e..7b0e4fca83683b 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -38,8 +38,7 @@ TestLogger &operator<<(TestLogger &logger, Location Loc) { // When the value is UInt128, __uint128_t or wider, show its hexadecimal // digits. template -cpp::enable_if_t<(cpp::is_integral_v && (sizeof(T) > sizeof(uint64_t))) || - cpp::is_big_int_v, +cpp::enable_if_t && (sizeof(T) > sizeof(uint64_t)), cpp::string> describeValue(T Value) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); @@ -48,10 +47,11 @@ describeValue(T Value) { } // When the value is of a standard integral type, just display it as normal. -template -cpp::enable_if_t && (sizeof(T) <= sizeof(uint64_t)), +template +cpp::enable_if_t && + sizeof(ValType) <= sizeof(uint64_t), cpp::string> -describeValue(T Value) { +describeValue(ValType Value) { return cpp::to_string(Value); } diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index d26d6490bcb572..639f6005832576 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -127,7 +127,6 @@ class Test { // of type promotion. template || - cpp::is_big_int_v || cpp::is_fixed_point_v, int> = 0> bool test(TestCond Cond, ValType LHS, ValType RHS, const char *LHSStr, diff --git a/libc/test/UnitTest/TestLogger.cpp b/libc/test/UnitTest/TestLogger.cpp index 469b3a11d57d9b..6bb0e17dc3888e 100644 --- a/libc/test/UnitTest/TestLogger.cpp +++ b/libc/test/UnitTest/TestLogger.cpp @@ -2,7 +2,6 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" #include "src/__support/OSUtil/io.h" // write_to_stderr -#include "src/__support/UInt.h" // is_big_int #include "src/__support/UInt128.h" #include @@ -48,9 +47,8 @@ template <> TestLogger &TestLogger::operator<<(void *addr) { } template TestLogger &TestLogger::operator<<(T t) { - if constexpr (cpp::is_big_int_v || - (cpp::is_integral_v && cpp::is_unsigned_v && - (sizeof(T) > sizeof(uint64_t)))) { + if constexpr (cpp::is_integral_v && cpp::is_unsigned_v && + sizeof(T) > sizeof(uint64_t)) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); const IntegerToString buffer(t); return *this << buffer.view(); @@ -70,7 +68,7 @@ template TestLogger &TestLogger::operator<< (unsigned short); template TestLogger &TestLogger::operator<< (unsigned int); template TestLogger &TestLogger::operator<< (unsigned long); template TestLogger & - TestLogger::operator<< (unsigned long long); +TestLogger::operator<< (unsigned long long); #ifdef __SIZEOF_INT128__ template TestLogger &TestLogger::operator<< <__uint128_t>(__uint128_t); diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 1e3d895e6453a7..115a5d505c4b7a 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -12,44 +12,21 @@ #include -#include - namespace LIBC_NAMESPACE::cpp { -using UnsignedTypesNoBigInt = testing::TypeList< -#if defined(__SIZEOF_INT128__) - __uint128_t, -#endif - unsigned char, unsigned short, unsigned int, unsigned long, - unsigned long long>; - -using UnsignedTypes = testing::TypeList< +using UnsignedTypes = + testing::TypeList>; + cpp::UInt<128>>; TYPED_TEST(LlvmLibcBitTest, HasSingleBit, UnsignedTypes) { - constexpr auto ZERO = T(0); - constexpr auto ALL_ONES = T(~ZERO); - EXPECT_FALSE(has_single_bit(ZERO)); - EXPECT_FALSE(has_single_bit(ALL_ONES)); - + EXPECT_FALSE(has_single_bit(T(0))); + EXPECT_FALSE(has_single_bit(~T(0))); for (T value = 1; value; value <<= 1) EXPECT_TRUE(has_single_bit(value)); - - // We test that if two bits are set has_single_bit returns false. - // We do this by setting the highest or lowest bit depending or where the - // current bit is. This is a bit convoluted but it helps catch a bug on BigInt - // where we have to work on an element-by-element basis. - constexpr auto MIDPOINT = T(ALL_ONES / 2); - constexpr auto LSB = T(1); - constexpr auto MSB = T(~(ALL_ONES >> 1)); - for (T value = 1; value; value <<= 1) { - auto two_bits_value = value | ((value <= MIDPOINT) ? MSB : LSB); - EXPECT_FALSE(has_single_bit(two_bits_value)); - } } TYPED_TEST(LlvmLibcBitTest, CountLZero, UnsignedTypes) { @@ -229,39 +206,39 @@ TEST(LlvmLibcBitTest, Rotr) { rotr(0x12345678deadbeefULL, -19)); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypes) { EXPECT_EQ(first_leading_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_zero(~(T(1) << i)), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { EXPECT_EQ(first_leading_one(static_cast(0)), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_one(T(1) << i), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { EXPECT_EQ(first_trailing_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { EXPECT_EQ(first_trailing_one(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); } -TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypes) { EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits::digits); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } -TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { EXPECT_EQ(count_ones(T(0)), 0); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5c6cf761ebe7de..49a454379e1c7a 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -507,7 +507,6 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", - ":__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index 44692947af7c08..a5c18fbb68b398 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -18,7 +18,6 @@ libc_support_library( "//libc:__support_cpp_string", "//libc:__support_cpp_string_view", "//libc:__support_osutil_io", - "//libc:__support_uint", "//libc:__support_uint128", ], ) From bf7f62ab92241298ccd7af008b3b26daac9c220b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Mar 2024 10:03:49 +0000 Subject: [PATCH 014/158] [AMDGPU] Make use of Mnem_gfx11_gfx12. NFC. --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index b984126d844722..9c6934865bfa55 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -2902,7 +2902,7 @@ multiclass MTBUF_Real_AllAddr_gfx11_gfx12 op> multiclass MTBUF_Real_AllAddr_gfx11_gfx12_Renamed op, string real_name> : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl { defvar ps = get_BUF_ps; - def : MnemonicAlias, Requires<[isGFX11Plus]>; + def : Mnem_gfx11_gfx12; } defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x008, "tbuffer_load_d16_format_x">; From 469c5e3da46115b9625e2d4771bd19d4968e3fa9 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Mar 2024 10:21:07 +0000 Subject: [PATCH 015/158] [AMDGPU] Simplify definition of renamed DS instructions. NFC. Following the pattern used for SOP instructions, we can use the same multiclass with a default argument to define renamed and non-renamed instructions. --- llvm/lib/Target/AMDGPU/DSInstructions.td | 149 ++++++++++------------- 1 file changed, 66 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 5b94102770cd56..a84227ebf506fe 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1210,33 +1210,24 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op, DS_Pseudo ps, int ef, // GFX12. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in { - multiclass DS_Real_gfx12 op> { - defvar ps = !cast(NAME); +multiclass DS_Real_gfx12 op, string name = !tolower(NAME)> { + defvar ps = !cast(NAME); + let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in def _gfx12 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12; - } - - multiclass DS_Real_Renamed_gfx12 op, string name> { - defvar ps = !cast(NAME); - def _gfx12 : - Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12, - MnemonicAlias, - Requires<[isGFX12Plus]>; - } -} // End AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" - -defm DS_MIN_F32 : DS_Real_Renamed_gfx12<0x012, "ds_min_num_f32">; -defm DS_MAX_F32 : DS_Real_Renamed_gfx12<0x013, "ds_max_num_f32">; -defm DS_MIN_RTN_F32 : DS_Real_Renamed_gfx12<0x032, "ds_min_num_rtn_f32">; -defm DS_MAX_RTN_F32 : DS_Real_Renamed_gfx12<0x033, "ds_max_num_rtn_f32">; -defm DS_MIN_F64 : DS_Real_Renamed_gfx12<0x052, "ds_min_num_f64">; -defm DS_MAX_F64 : DS_Real_Renamed_gfx12<0x053, "ds_max_num_f64">; -defm DS_MIN_RTN_F64 : DS_Real_Renamed_gfx12<0x072, "ds_min_num_rtn_f64">; -defm DS_MAX_RTN_F64 : DS_Real_Renamed_gfx12<0x073, "ds_max_num_rtn_f64">; + name, /*hasGDS=*/false>; + if !ne(ps.Mnemonic, name) then + def : MnemonicAlias, Requires<[isGFX12Plus]>; +} + +defm DS_MIN_F32 : DS_Real_gfx12<0x012, "ds_min_num_f32">; +defm DS_MAX_F32 : DS_Real_gfx12<0x013, "ds_max_num_f32">; +defm DS_MIN_RTN_F32 : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">; +defm DS_MAX_RTN_F32 : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">; +defm DS_MIN_F64 : DS_Real_gfx12<0x052, "ds_min_num_f64">; +defm DS_MAX_F64 : DS_Real_gfx12<0x053, "ds_max_num_f64">; +defm DS_MIN_RTN_F64 : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">; +defm DS_MAX_RTN_F64 : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">; defm DS_COND_SUB_U32 : DS_Real_gfx12<0x098>; defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>; defm DS_COND_SUB_RTN_U32 : DS_Real_gfx12<0x0a8>; @@ -1256,65 +1247,57 @@ def : MnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">, Requires<[isGFX12Pl // GFX11. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { - multiclass DS_Real_gfx11 op> { +multiclass DS_Real_gfx11 op, string name = !tolower(NAME)> { + defvar ps = !cast(NAME); + let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in def _gfx11 : - Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12(NAME), - SIEncodingFamily.GFX11>; - } - - multiclass DS_Real_Renamed_gfx11 op, string name> { - defvar ps = !cast(NAME); - def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12, - MnemonicAlias, Requires<[isGFX11Only]>; - } -} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" - -multiclass DS_Real_gfx11_gfx12 op> - : DS_Real_gfx11, DS_Real_gfx12; - -multiclass DS_Real_Renamed_gfx11_gfx12 op, string name> - : DS_Real_Renamed_gfx11, - DS_Real_Renamed_gfx12; - -defm DS_WRITE_B32 : DS_Real_Renamed_gfx11_gfx12<0x00d, "ds_store_b32">; -defm DS_WRITE2_B32 : DS_Real_Renamed_gfx11_gfx12<0x00e, "ds_store_2addr_b32">; -defm DS_WRITE2ST64_B32 : DS_Real_Renamed_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">; -defm DS_WRITE_B8 : DS_Real_Renamed_gfx11_gfx12<0x01e, "ds_store_b8">; -defm DS_WRITE_B16 : DS_Real_Renamed_gfx11_gfx12<0x01f, "ds_store_b16">; -defm DS_WRXCHG_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">; -defm DS_WRXCHG2_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">; -defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">; -defm DS_READ_B32 : DS_Real_Renamed_gfx11_gfx12<0x036, "ds_load_b32">; -defm DS_READ2_B32 : DS_Real_Renamed_gfx11_gfx12<0x037, "ds_load_2addr_b32">; -defm DS_READ2ST64_B32 : DS_Real_Renamed_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">; -defm DS_READ_I8 : DS_Real_Renamed_gfx11_gfx12<0x039, "ds_load_i8">; -defm DS_READ_U8 : DS_Real_Renamed_gfx11_gfx12<0x03a, "ds_load_u8">; -defm DS_READ_I16 : DS_Real_Renamed_gfx11_gfx12<0x03b, "ds_load_i16">; -defm DS_READ_U16 : DS_Real_Renamed_gfx11_gfx12<0x03c, "ds_load_u16">; -defm DS_WRITE_B64 : DS_Real_Renamed_gfx11_gfx12<0x04d, "ds_store_b64">; -defm DS_WRITE2_B64 : DS_Real_Renamed_gfx11_gfx12<0x04e, "ds_store_2addr_b64">; -defm DS_WRITE2ST64_B64 : DS_Real_Renamed_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">; -defm DS_WRXCHG_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">; -defm DS_WRXCHG2_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">; -defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">; -defm DS_READ_B64 : DS_Real_Renamed_gfx11_gfx12<0x076, "ds_load_b64">; -defm DS_READ2_B64 : DS_Real_Renamed_gfx11_gfx12<0x077, "ds_load_2addr_b64">; -defm DS_READ2ST64_B64 : DS_Real_Renamed_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">; -defm DS_WRITE_B8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">; -defm DS_WRITE_B16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">; -defm DS_READ_U8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a2, "ds_load_u8_d16">; -defm DS_READ_U8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">; -defm DS_READ_I8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a4, "ds_load_i8_d16">; -defm DS_READ_I8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">; -defm DS_READ_U16_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a6, "ds_load_u16_d16">; -defm DS_READ_U16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">; -defm DS_WRITE_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">; -defm DS_READ_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">; -defm DS_WRITE_B96 : DS_Real_Renamed_gfx11_gfx12<0x0de, "ds_store_b96">; -defm DS_WRITE_B128 : DS_Real_Renamed_gfx11_gfx12<0x0df, "ds_store_b128">; -defm DS_READ_B96 : DS_Real_Renamed_gfx11_gfx12<0x0fe, "ds_load_b96">; -defm DS_READ_B128 : DS_Real_Renamed_gfx11_gfx12<0x0ff, "ds_load_b128">; + Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12; + if !ne(ps.Mnemonic, name) then + def : MnemonicAlias, Requires<[isGFX11Only]>; +} + +multiclass DS_Real_gfx11_gfx12 op, string name = !tolower(NAME)> + : DS_Real_gfx11, DS_Real_gfx12; + +defm DS_WRITE_B32 : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">; +defm DS_WRITE2_B32 : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">; +defm DS_WRITE2ST64_B32 : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">; +defm DS_WRITE_B8 : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">; +defm DS_WRITE_B16 : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">; +defm DS_WRXCHG_RTN_B32 : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">; +defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">; +defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">; +defm DS_READ_B32 : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">; +defm DS_READ2_B32 : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">; +defm DS_READ2ST64_B32 : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">; +defm DS_READ_I8 : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">; +defm DS_READ_U8 : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">; +defm DS_READ_I16 : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">; +defm DS_READ_U16 : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">; +defm DS_WRITE_B64 : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">; +defm DS_WRITE2_B64 : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">; +defm DS_WRITE2ST64_B64 : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">; +defm DS_WRXCHG_RTN_B64 : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">; +defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">; +defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">; +defm DS_READ_B64 : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">; +defm DS_READ2_B64 : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">; +defm DS_READ2ST64_B64 : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">; +defm DS_WRITE_B8_D16_HI : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">; +defm DS_WRITE_B16_D16_HI : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">; +defm DS_READ_U8_D16 : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">; +defm DS_READ_U8_D16_HI : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">; +defm DS_READ_I8_D16 : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">; +defm DS_READ_I8_D16_HI : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">; +defm DS_READ_U16_D16 : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">; +defm DS_READ_U16_D16_HI : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">; +defm DS_WRITE_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">; +defm DS_READ_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">; +defm DS_WRITE_B96 : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">; +defm DS_WRITE_B128 : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">; +defm DS_READ_B96 : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">; +defm DS_READ_B128 : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">; // DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped // comparing to pre-GFX11. From afac64cef40c77320cc49808be30f3e5ef7f7357 Mon Sep 17 00:00:00 2001 From: Matthias Gehre <93204396+mgehre-amd@users.noreply.github.com> Date: Thu, 29 Feb 2024 07:32:30 +0100 Subject: [PATCH 016/158] [MLIR] BufferResultsToOutParams: Allow to configure memCpyFn This allows us to configure the pass to emit linalg.copy instead of memref.copy. This is consistent with one-shot-bufferize, which also allows to configure the `memCpyFn`, see https://discord.com/channels/636084430946959380/642426447167881246/1211698722438783087 --- .../Dialect/Bufferization/Transforms/Passes.h | 8 ++++++ .../Transforms/BufferResultsToOutParams.cpp | 28 ++++++++++++++----- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index bb4b5221981638..809f03407258a8 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -149,11 +149,19 @@ std::unique_ptr createBufferLoopHoistingPass(); // Options struct for BufferResultsToOutParams pass. // Note: defined only here, not in tablegen. struct BufferResultsToOutParamsOptions { + /// Memcpy function: Generate a memcpy between two memrefs. + using MemCpyFn = + std::function; + // Filter function; returns true if the function should be converted. // Defaults to true, i.e. all functions are converted. llvm::function_ref filterFn = [](func::FuncOp *func) { return true; }; + + /// Memcpy function; used to create a copy between two memrefs. + /// If this is empty, memref.copy is used. + std::optional memCpyFn; }; /// Creates a pass that converts memref function results to out-params. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp index dd359c2dcca5dd..930f035339c1d3 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp @@ -21,6 +21,7 @@ namespace bufferization { } // namespace mlir using namespace mlir; +using MemCpyFn = bufferization::BufferResultsToOutParamsOptions::MemCpyFn; /// Return `true` if the given MemRef type has a fully dynamic layout. static bool hasFullyDynamicLayoutMap(MemRefType type) { @@ -97,9 +98,10 @@ updateFuncOp(func::FuncOp func, // Updates all ReturnOps in the scope of the given func::FuncOp by either // keeping them as return values or copying the associated buffer contents into // the given out-params. -static void updateReturnOps(func::FuncOp func, - ArrayRef appendedEntryArgs) { - func.walk([&](func::ReturnOp op) { +static LogicalResult updateReturnOps(func::FuncOp func, + ArrayRef appendedEntryArgs, + MemCpyFn memCpyFn) { + auto res = func.walk([&](func::ReturnOp op) { SmallVector copyIntoOutParams; SmallVector keepAsReturnOperands; for (Value operand : op.getOperands()) { @@ -109,12 +111,16 @@ static void updateReturnOps(func::FuncOp func, keepAsReturnOperands.push_back(operand); } OpBuilder builder(op); - for (auto t : llvm::zip(copyIntoOutParams, appendedEntryArgs)) - builder.create(op.getLoc(), std::get<0>(t), - std::get<1>(t)); + for (auto t : llvm::zip(copyIntoOutParams, appendedEntryArgs)) { + if (failed( + memCpyFn(builder, op.getLoc(), std::get<0>(t), std::get<1>(t)))) + return WalkResult::interrupt(); + } builder.create(op.getLoc(), keepAsReturnOperands); op.erase(); + return WalkResult::advance(); }); + return failure(res.wasInterrupted()); } // Updates all CallOps in the scope of the given ModuleOp by allocating @@ -192,7 +198,15 @@ LogicalResult mlir::bufferization::promoteBufferResultsToOutParams( return failure(); if (func.isExternal()) continue; - updateReturnOps(func, appendedEntryArgs); + auto defaultMemCpyFn = [](OpBuilder &builder, Location loc, Value from, + Value to) { + builder.create(loc, from, to); + return success(); + }; + if (failed(updateReturnOps(func, appendedEntryArgs, + options.memCpyFn.value_or(defaultMemCpyFn)))) { + return failure(); + } } if (failed(updateCalls(module, options))) return failure(); From 6f54a54c6f5f644b4f4c79882154fd9737568c8e Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Thu, 7 Mar 2024 10:33:26 +0000 Subject: [PATCH 017/158] [FMV] Remove duplicate features from mangled name. (#84165) ACLE suggests: https://github.com/ARM-software/acle/pull/308. GCC emits diagnostics for attribute strings which contain duplicate features, but for now let's follow the SPEC in regards to mangling rules and we can change the semantic behavior of the compiler later if there's value to it. --- clang/lib/CodeGen/Targets/AArch64.cpp | 4 +++- clang/test/CodeGen/attr-target-version.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 725e8a70fddfe6..85117366de0ee8 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -886,9 +886,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, return LHS.compare(RHS) < 0; }); + llvm::SmallDenseSet UniqueFeats; for (auto &Feat : Features) if (auto Ext = llvm::AArch64::parseArchExtension(Feat)) - Out << 'M' << Ext->Name; + if (UniqueFeats.insert(Ext->Name).second) + Out << 'M' << Ext->Name; } std::unique_ptr diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index ae1a8772f6cc07..b7112c783da913 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -273,7 +273,7 @@ int hoo(void) { // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16Mfp16MrdmMsme +// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16MrdmMsme // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064 @@ -582,7 +582,7 @@ int hoo(void) { // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16Mfp16MrdmMsme +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16MrdmMsme // CHECK-SAME: () #[[ATTR13:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 From c40146c214a705a232848144d9412c8a7c73f0fe Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Thu, 7 Mar 2024 11:34:11 +0100 Subject: [PATCH 018/158] [mlir][EmitC] Add Arith to EmitC conversions (#84151) This adds patterns and a pass to convert the Arith dialect to EmitC. For now, this covers arithemtic binary ops operating on floating point types. It is not checked within the patterns whether the types, such as the Tensor type, are supported in the respective EmitC operations. If unsupported types should be converted, the conversion will fail anyway because no legal EmitC operation can be created. This can clearly be improved in a follow up, also resulting in better error messages. Functions for such checks should not solely be used in the conversions and should also be (re)used in the verifier. --- .../Conversion/ArithToEmitC/ArithToEmitC.h | 20 +++++++ .../ArithToEmitC/ArithToEmitCPass.h | 21 +++++++ mlir/include/mlir/Conversion/Passes.h | 1 + mlir/include/mlir/Conversion/Passes.td | 9 +++ .../Conversion/ArithToEmitC/ArithToEmitC.cpp | 60 +++++++++++++++++++ .../ArithToEmitC/ArithToEmitCPass.cpp | 53 ++++++++++++++++ .../Conversion/ArithToEmitC/CMakeLists.txt | 16 +++++ mlir/lib/Conversion/CMakeLists.txt | 1 + .../ArithToEmitC/arith-to-emitc.mlir | 14 +++++ .../llvm-project-overlay/mlir/BUILD.bazel | 27 +++++++++ 10 files changed, 222 insertions(+) create mode 100644 mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitC.h create mode 100644 mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h create mode 100644 mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp create mode 100644 mlir/lib/Conversion/ArithToEmitC/ArithToEmitCPass.cpp create mode 100644 mlir/lib/Conversion/ArithToEmitC/CMakeLists.txt create mode 100644 mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir diff --git a/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitC.h b/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitC.h new file mode 100644 index 00000000000000..9cb43689d1ce64 --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitC.h @@ -0,0 +1,20 @@ +//===- ArithToEmitC.h - Arith to EmitC Patterns -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITC_H +#define MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITC_H + +namespace mlir { +class RewritePatternSet; +class TypeConverter; + +void populateArithToEmitCPatterns(TypeConverter &typeConverter, + RewritePatternSet &patterns); +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITC_H diff --git a/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h b/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h new file mode 100644 index 00000000000000..6b98fed7185ead --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h @@ -0,0 +1,21 @@ +//===- ArithToEmitCPass.h - Arith to EmitC Pass -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITCPASS_H +#define MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITCPASS_H + +#include + +namespace mlir { +class Pass; + +#define GEN_PASS_DECL_CONVERTARITHTOEMITC +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHTOEMITC_ARITHTOEMITCPASS_H diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 81f69210fade8d..f2aa4fb535402d 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -13,6 +13,7 @@ #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" #include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h" +#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ArithToSPIRV/ArithToSPIRV.h" #include "mlir/Conversion/ArmNeon2dToIntr/ArmNeon2dToIntr.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 94fc7a7d2194bf..bd81cc6d5323bf 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -133,6 +133,15 @@ def ArithToAMDGPUConversionPass : Pass<"convert-arith-to-amdgpu"> { ]; } +//===----------------------------------------------------------------------===// +// ArithToEmitC +//===----------------------------------------------------------------------===// + +def ConvertArithToEmitC : Pass<"convert-arith-to-emitc"> { + let summary = "Convert Arith dialect to EmitC dialect"; + let dependentDialects = ["emitc::EmitCDialect"]; +} + //===----------------------------------------------------------------------===// // ArithToLLVM //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp new file mode 100644 index 00000000000000..6909534d4790fe --- /dev/null +++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp @@ -0,0 +1,60 @@ +//===- ArithToEmitC.cpp - Arith to EmitC Patterns ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements patterns to convert the Arith dialect to the EmitC +// dialect. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithToEmitC/ArithToEmitC.h" + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Conversion Patterns +//===----------------------------------------------------------------------===// + +namespace { +template +class ArithOpConversion final : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ArithOp arithOp, typename ArithOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + rewriter.template replaceOpWithNewOp(arithOp, arithOp.getType(), + adaptor.getOperands()); + + return success(); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Pattern population +//===----------------------------------------------------------------------===// + +void mlir::populateArithToEmitCPatterns(TypeConverter &typeConverter, + RewritePatternSet &patterns) { + MLIRContext *ctx = patterns.getContext(); + + // clang-format off + patterns.add< + ArithOpConversion, + ArithOpConversion, + ArithOpConversion, + ArithOpConversion + >(typeConverter, ctx); + // clang-format on +} diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitCPass.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitCPass.cpp new file mode 100644 index 00000000000000..b377c063a7aa0e --- /dev/null +++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitCPass.cpp @@ -0,0 +1,53 @@ +//===- ArithToEmitCPass.cpp - Arith to EmitC Pass ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to convert the Arith dialect to the EmitC +// dialect. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" + +#include "mlir/Conversion/ArithToEmitC/ArithToEmitC.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { +#define GEN_PASS_DEF_CONVERTARITHTOEMITC +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +using namespace mlir; + +namespace { +struct ConvertArithToEmitC + : public impl::ConvertArithToEmitCBase { + void runOnOperation() override; +}; +} // namespace + +void ConvertArithToEmitC::runOnOperation() { + ConversionTarget target(getContext()); + + target.addLegalDialect(); + target.addIllegalDialect(); + target.addLegalOp(); + + RewritePatternSet patterns(&getContext()); + + TypeConverter typeConverter; + typeConverter.addConversion([](Type type) { return type; }); + + populateArithToEmitCPatterns(typeConverter, patterns); + + if (failed( + applyPartialConversion(getOperation(), target, std::move(patterns)))) + signalPassFailure(); +} diff --git a/mlir/lib/Conversion/ArithToEmitC/CMakeLists.txt b/mlir/lib/Conversion/ArithToEmitC/CMakeLists.txt new file mode 100644 index 00000000000000..a3784f47c3bc2d --- /dev/null +++ b/mlir/lib/Conversion/ArithToEmitC/CMakeLists.txt @@ -0,0 +1,16 @@ +add_mlir_conversion_library(MLIRArithToEmitC + ArithToEmitC.cpp + ArithToEmitCPass.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ArithToEmitC + + DEPENDS + MLIRConversionPassIncGen + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIREmitCDialect + MLIRPass + MLIRTransformUtils + ) diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index 9e421f7c49dbc3..8219cf98575f3c 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(AMDGPUToROCDL) add_subdirectory(ArithCommon) add_subdirectory(ArithToAMDGPU) add_subdirectory(ArithToArmSME) +add_subdirectory(ArithToEmitC) add_subdirectory(ArithToLLVM) add_subdirectory(ArithToSPIRV) add_subdirectory(ArmNeon2dToIntr) diff --git a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir new file mode 100644 index 00000000000000..6a56474a5c48b2 --- /dev/null +++ b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt -convert-arith-to-emitc %s | FileCheck %s + +func.func @arith_ops(%arg0: f32, %arg1: f32) { + // CHECK: [[V0:[^ ]*]] = emitc.add %arg0, %arg1 : (f32, f32) -> f32 + %0 = arith.addf %arg0, %arg1 : f32 + // CHECK: [[V1:[^ ]*]] = emitc.div %arg0, %arg1 : (f32, f32) -> f32 + %1 = arith.divf %arg0, %arg1 : f32 + // CHECK: [[V2:[^ ]*]] = emitc.mul %arg0, %arg1 : (f32, f32) -> f32 + %2 = arith.mulf %arg0, %arg1 : f32 + // CHECK: [[V3:[^ ]*]] = emitc.sub %arg0, %arg1 : (f32, f32) -> f32 + %3 = arith.subf %arg0, %arg1 : f32 + + return +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 86b38ebd2217b9..9d6ca4ed932fe4 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4014,6 +4014,7 @@ cc_library( ":AffineToStandard", ":ArithToAMDGPU", ":ArithToArmSME", + ":ArithToEmitC", ":ArithToLLVM", ":ArithToSPIRV", ":ArmNeon2dToIntr", @@ -8162,6 +8163,32 @@ cc_library( ], ) +cc_library( + name = "ArithToEmitC", + srcs = glob([ + "lib/Conversion/ArithToEmitC/*.cpp", + "lib/Conversion/ArithToEmitC/*.h", + ]), + hdrs = glob([ + "include/mlir/Conversion/ArithToEmitC/*.h", + ]), + includes = [ + "include", + "lib/Conversion/ArithToEmitC", + ], + deps = [ + ":ArithDialect", + ":ConversionPassIncGen", + ":EmitCDialect", + ":IR", + ":Pass", + ":Support", + ":TransformUtils", + ":Transforms", + "//llvm:Support", + ], +) + cc_library( name = "ArithToLLVM", srcs = glob(["lib/Conversion/ArithToLLVM/*.cpp"]), From 245d669f1d1c3f66d0d3d8aa7cffa5ef0d7747ff Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 7 Mar 2024 11:41:35 +0100 Subject: [PATCH 019/158] [reland][libc] Remove UB specializations of type traits for `BigInt` (#84299) Note: This is a reland of #84035. The standard specifies that it it UB to specialize the following traits: - `std::is_integral` - `std::is_unsigned` - `std::make_unsigned` - `std::make_signed` This patch: - Removes specializations for `BigInt` - Transforms SFINAE for `bit.h` functions from template parameter to return type (This makes specialization easier). - Adds `BigInt` specialization for `bit.h` functions. - Fixes code depending on previous specializations. --- libc/src/__support/CMakeLists.txt | 1 + libc/src/__support/CPP/bit.h | 112 ++++++---- libc/src/__support/UInt.h | 192 ++++++++++++------ libc/src/__support/float_to_string.h | 2 +- libc/src/__support/integer_to_string.h | 19 +- libc/test/UnitTest/CMakeLists.txt | 1 + libc/test/UnitTest/LibcTest.cpp | 10 +- libc/test/UnitTest/LibcTest.h | 1 + libc/test/UnitTest/TestLogger.cpp | 8 +- libc/test/src/__support/CPP/bit_test.cpp | 47 +++-- .../test/src/__support/FPUtil/fpbits_test.cpp | 2 + .../llvm-project-overlay/libc/BUILD.bazel | 1 + .../libc/test/UnitTest/BUILD.bazel | 1 + 13 files changed, 272 insertions(+), 125 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 1a4b3e9a2145c0..17c04aa57e6fd6 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -95,6 +95,7 @@ add_header_library( HDRS integer_to_string.h DEPENDS + .uint libc.src.__support.common libc.src.__support.CPP.algorithm libc.src.__support.CPP.limits diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 7d11e7d5c497e0..bc2f595845a95f 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -27,13 +27,14 @@ namespace LIBC_NAMESPACE::cpp { // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. -template < - typename To, typename From, - typename = cpp::enable_if_t::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value, + To> +bit_cast(const From &from) { MSAN_UNPOISON(&from, sizeof(From)); #if LIBC_HAS_BUILTIN(__builtin_bit_cast) return __builtin_bit_cast(To, from); @@ -51,8 +52,10 @@ LIBC_INLINE constexpr To bit_cast(const From &from) { #endif // LIBC_HAS_BUILTIN(__builtin_bit_cast) } -template >> -[[nodiscard]] LIBC_INLINE constexpr bool has_single_bit(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, + bool> +has_single_bit(T value) { return (value != 0) && ((value & (value - 1)) == 0); } @@ -70,8 +73,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(T value) { if (!value) return cpp::numeric_limits::digits; if (value & 0x1) @@ -103,8 +107,9 @@ ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(T value) { if (!value) return cpp::numeric_limits::digits; // Bisection method. @@ -135,8 +140,9 @@ ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { return cpp::countl_zero(~value); } @@ -147,8 +153,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { return cpp::countr_zero(~value); } @@ -156,8 +163,9 @@ template >> /// Returns 0 otherwise. /// /// Ex. bit_width(5) == 3. -template >> -[[nodiscard]] LIBC_INLINE constexpr int bit_width(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { return cpp::numeric_limits::digits - cpp::countl_zero(value); } @@ -165,8 +173,9 @@ template >> /// nonzero. Returns 0 otherwise. /// /// Ex. bit_floor(5) == 4. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_floor(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_floor(T value) { if (!value) return 0; return T(1) << (cpp::bit_width(value) - 1); @@ -179,8 +188,9 @@ template >> /// /// The return value is undefined if the input is larger than the largest power /// of two representable in T. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_ceil(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_ceil(T value) { if (value < 2) return 1; return T(1) << cpp::bit_width(value - 1u); @@ -190,28 +200,31 @@ template >> // from https://blog.regehr.org/archives/1063. // Forward-declare rotr so that rotl can use it. -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate); +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotl(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotr(value, -rotate); + return cpp::rotr(value, -rotate); return (value << rotate) | (value >> (N - rotate)); } -template -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotl(value, -rotate); + return cpp::rotl(value, -rotate); return (value >> rotate) | (value << (N - rotate)); } @@ -226,33 +239,44 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { } } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_one(T value) { return first_leading_zero(static_cast(~value)); } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(static_cast(~value)) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } /// Count number of 1's aka population count or hamming weight. /// /// Only unsigned integral types are allowed. -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_ones(T value) { +// TODO: rename as 'popcount' to follow the standard +// https://en.cppreference.com/w/cpp/numeric/popcount +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +count_ones(T value) { int count = 0; for (int i = 0; i != cpp::numeric_limits::digits; ++i) if ((value >> i) & 0x1) @@ -272,8 +296,10 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) // TODO: 128b specializations? #undef ADD_SPECIALIZATION -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_zeros(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +count_zeros(T value) { return count_ones(static_cast(~value)); } diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 5973e6fab1d7d5..b3d8f00b9a01a5 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -43,6 +43,9 @@ struct BigInt { static_assert(is_integral_v && is_unsigned_v, "WordType must be unsigned integer."); + using word_type = WordType; + LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; + LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR static constexpr size_t WORD_SIZE = sizeof(WordType) * CHAR_BIT; @@ -50,6 +53,10 @@ struct BigInt { "Number of bits in BigInt should be a multiple of WORD_SIZE."); LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; + + using unsigned_type = BigInt; + using signed_type = BigInt; + cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; @@ -579,19 +586,33 @@ struct BigInt { return *this; } - LIBC_INLINE constexpr uint64_t clz() { - uint64_t leading_zeroes = 0; - for (size_t i = WORD_COUNT; i > 0; --i) { - if (val[i - 1] == 0) { - leading_zeroes += WORD_SIZE; - } else { - leading_zeroes += countl_zero(val[i - 1]); + // TODO: remove and use cpp::countl_zero below. + [[nodiscard]] LIBC_INLINE constexpr int clz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int leading_zeroes = 0; + for (auto i = val.size(); i > 0;) { + --i; + const int zeroes = countl_zero(val[i]); + leading_zeroes += zeroes; + if (zeroes != word_digits) break; - } } return leading_zeroes; } + // TODO: remove and use cpp::countr_zero below. + [[nodiscard]] LIBC_INLINE constexpr int ctz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int trailing_zeroes = 0; + for (auto word : val) { + const int zeroes = countr_zero(word); + trailing_zeroes += zeroes; + if (zeroes != word_digits) + break; + } + return trailing_zeroes; + } + LIBC_INLINE constexpr void shift_left(size_t s) { if constexpr (Bits == WORD_SIZE) { // Use native types if possible. @@ -916,66 +937,123 @@ template <> class numeric_limits> { LIBC_INLINE_VAR static constexpr int digits = 128; }; -// Provides is_integral of U/Int<128>, U/Int<192>, U/Int<256>. -template -struct is_integral> : cpp::true_type {}; +// type traits to determine whether a T is a cpp::BigInt. +template struct is_big_int : cpp::false_type {}; -// Provides is_unsigned of UInt<128>, UInt<192>, UInt<256>. template -struct is_unsigned> : cpp::bool_constant {}; - -template -struct make_unsigned> - : type_identity> {}; - -template -struct make_signed> - : type_identity> {}; - -namespace internal { -template struct is_custom_uint : cpp::false_type {}; - -template -struct is_custom_uint> : cpp::true_type {}; -} // namespace internal - -// bit_cast to UInt -// Note: The standard scheme for SFINAE selection is to have exactly one -// function instanciation valid at a time. This is usually done by having a -// predicate in one function and the negated predicate in the other one. -// e.g. -// template::value == true> ... -// template::value == false> ... -// -// Unfortunately this would make the default 'cpp::bit_cast' aware of -// 'is_custom_uint' (or any other customization). To prevent exposing all -// customizations in the original function, we create a different function with -// four 'typename's instead of three - otherwise it would be considered as a -// redeclaration of the same function leading to "error: template parameter -// redefines default argument". -template ::value && - cpp::is_trivially_copyable::value>, - typename = cpp::enable_if_t::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +struct is_big_int> : cpp::true_type {}; + +template +LIBC_INLINE_VAR constexpr bool is_big_int_v = is_big_int::value; + +// Specialization of cpp::bit_cast ('bit.h') from T to BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value && is_big_int::value, + To> +bit_cast(const From &from) { To out; using Storage = decltype(out.val); out.val = cpp::bit_cast(from); return out; } -// bit_cast from UInt -template < - typename To, size_t Bits, - typename = cpp::enable_if_t) && - cpp::is_trivially_constructible::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable>::value>> -LIBC_INLINE constexpr To bit_cast(const UInt &from) { +// Specialization of cpp::bit_cast ('bit.h') from BigInt to T. +template +LIBC_INLINE constexpr cpp::enable_if_t< + sizeof(To) == sizeof(UInt) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable>::value, + To> +bit_cast(const UInt &from) { return cpp::bit_cast(from.val); } +// Specialization of cpp::has_single_bit ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, bool> +has_single_bit(T value) { + int bits = 0; + for (auto word : value.val) { + if (word == 0) + continue; + bits += count_ones(word); + if (bits > 1) + return false; + } + return bits == 1; +} + +// Specialization of cpp::countr_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(const T &value) { + return value.ctz(); +} + +// Specialization of cpp::countl_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(const T &value) { + return value.clz(); +} + +// Specialization of cpp::countl_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countl_zero(~value); +} + +// Specialization of cpp::countr_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countr_zero(~value); +} + +// Specialization of cpp::bit_width ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { + return cpp::numeric_limits::digits - cpp::countl_zero(value); +} + +// Forward-declare rotr so that rotl can use it. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); + +// Specialization of cpp::rotl ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotr(value, -rotate); + return (value << rotate) | (value >> (N - rotate)); +} + +// Specialization of cpp::rotr ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotl(value, -rotate); + return (value >> rotate) | (value << (N - rotate)); +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 744842ced8d772..27476433a94575 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -713,7 +713,7 @@ template <> class FloatToString { float_as_fixed.shift_left(SHIFT_AMOUNT); // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < EXTRA_INT_WIDTH) { + if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { cpp::UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 81ed21ccfca166..a5872dce652036 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -67,6 +67,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/UInt.h" // is_big_int #include "src/__support/common.h" namespace LIBC_NAMESPACE { @@ -149,6 +150,18 @@ template class StringBufferWriterImpl { using StringBufferWriter = StringBufferWriterImpl; using BackwardStringBufferWriter = StringBufferWriterImpl; +template struct IntegerWriterUnsigned {}; + +template +struct IntegerWriterUnsigned>> { + using type = cpp::make_unsigned_t; +}; + +template +struct IntegerWriterUnsigned>> { + using type = typename T::unsigned_type; +}; + } // namespace details namespace radix { @@ -163,7 +176,7 @@ template using Custom = details::Fmt; // See file header for documentation. template class IntegerToString { - static_assert(cpp::is_integral_v); + static_assert(cpp::is_integral_v || cpp::is_big_int_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { constexpr auto MAX_DIGITS = []() -> size_t { @@ -208,8 +221,8 @@ template class IntegerToString { // An internal stateless structure that handles the number formatting logic. struct IntegerWriter { - static_assert(cpp::is_integral_v); - using UNSIGNED_T = cpp::make_unsigned_t; + static_assert(cpp::is_integral_v || cpp::is_big_int_v); + using UNSIGNED_T = typename details::IntegerWriterUnsigned::type; LIBC_INLINE static char digit_char(uint8_t digit) { if (digit < 10) diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 4668f0061975f8..36837c553efce1 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -74,6 +74,7 @@ add_unittest_framework_library( libc.src.__support.CPP.type_traits libc.src.__support.fixed_point.fx_rep libc.src.__support.OSUtil.osutil + libc.src.__support.uint libc.src.__support.uint128 ) diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index 7b0e4fca83683b..0340f7ed37100e 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -38,7 +38,8 @@ TestLogger &operator<<(TestLogger &logger, Location Loc) { // When the value is UInt128, __uint128_t or wider, show its hexadecimal // digits. template -cpp::enable_if_t && (sizeof(T) > sizeof(uint64_t)), +cpp::enable_if_t<(cpp::is_integral_v && (sizeof(T) > sizeof(uint64_t))) || + cpp::is_big_int_v, cpp::string> describeValue(T Value) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); @@ -47,11 +48,10 @@ describeValue(T Value) { } // When the value is of a standard integral type, just display it as normal. -template -cpp::enable_if_t && - sizeof(ValType) <= sizeof(uint64_t), +template +cpp::enable_if_t && (sizeof(T) <= sizeof(uint64_t)), cpp::string> -describeValue(ValType Value) { +describeValue(T Value) { return cpp::to_string(Value); } diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index 639f6005832576..d26d6490bcb572 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -127,6 +127,7 @@ class Test { // of type promotion. template || + cpp::is_big_int_v || cpp::is_fixed_point_v, int> = 0> bool test(TestCond Cond, ValType LHS, ValType RHS, const char *LHSStr, diff --git a/libc/test/UnitTest/TestLogger.cpp b/libc/test/UnitTest/TestLogger.cpp index 6bb0e17dc3888e..469b3a11d57d9b 100644 --- a/libc/test/UnitTest/TestLogger.cpp +++ b/libc/test/UnitTest/TestLogger.cpp @@ -2,6 +2,7 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" #include "src/__support/OSUtil/io.h" // write_to_stderr +#include "src/__support/UInt.h" // is_big_int #include "src/__support/UInt128.h" #include @@ -47,8 +48,9 @@ template <> TestLogger &TestLogger::operator<<(void *addr) { } template TestLogger &TestLogger::operator<<(T t) { - if constexpr (cpp::is_integral_v && cpp::is_unsigned_v && - sizeof(T) > sizeof(uint64_t)) { + if constexpr (cpp::is_big_int_v || + (cpp::is_integral_v && cpp::is_unsigned_v && + (sizeof(T) > sizeof(uint64_t)))) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); const IntegerToString buffer(t); return *this << buffer.view(); @@ -68,7 +70,7 @@ template TestLogger &TestLogger::operator<< (unsigned short); template TestLogger &TestLogger::operator<< (unsigned int); template TestLogger &TestLogger::operator<< (unsigned long); template TestLogger & -TestLogger::operator<< (unsigned long long); + TestLogger::operator<< (unsigned long long); #ifdef __SIZEOF_INT128__ template TestLogger &TestLogger::operator<< <__uint128_t>(__uint128_t); diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 115a5d505c4b7a..25a80ca9209c2f 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -14,19 +14,40 @@ namespace LIBC_NAMESPACE::cpp { -using UnsignedTypes = - testing::TypeList>; + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long>; + +using UnsignedTypes = testing::TypeList< +#if defined(__SIZEOF_INT128__) + __uint128_t, +#endif + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long, cpp::UInt<128>>; TYPED_TEST(LlvmLibcBitTest, HasSingleBit, UnsignedTypes) { - EXPECT_FALSE(has_single_bit(T(0))); - EXPECT_FALSE(has_single_bit(~T(0))); + constexpr auto ZERO = T(0); + constexpr auto ALL_ONES = T(~ZERO); + EXPECT_FALSE(has_single_bit(ZERO)); + EXPECT_FALSE(has_single_bit(ALL_ONES)); + for (T value = 1; value; value <<= 1) EXPECT_TRUE(has_single_bit(value)); + + // We test that if two bits are set has_single_bit returns false. + // We do this by setting the highest or lowest bit depending or where the + // current bit is. This is a bit convoluted but it helps catch a bug on BigInt + // where we have to work on an element-by-element basis. + constexpr auto MIDPOINT = T(ALL_ONES / 2); + constexpr auto LSB = T(1); + constexpr auto MSB = T(~(ALL_ONES >> 1)); + for (T value = 1; value; value <<= 1) { + auto two_bits_value = value | ((value <= MIDPOINT) ? MSB : LSB); + EXPECT_FALSE(has_single_bit(two_bits_value)); + } } TYPED_TEST(LlvmLibcBitTest, CountLZero, UnsignedTypes) { @@ -206,39 +227,39 @@ TEST(LlvmLibcBitTest, Rotr) { rotr(0x12345678deadbeefULL, -19)); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_zero(~(T(1) << i)), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_one(static_cast(0)), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_one(T(1) << i), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_one(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); } -TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits::digits); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } -TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { EXPECT_EQ(count_ones(T(0)), 0); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index f5c27d4fc0302b..760031569c81f1 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -237,6 +237,8 @@ template constexpr auto make(Sign sign, FP fp) { return T::signaling_nan(sign); case FP::QUIET_NAN: return T::quiet_nan(sign); + default: + __builtin_unreachable(); } } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 49a454379e1c7a..5c6cf761ebe7de 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -507,6 +507,7 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", + ":__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index a5c18fbb68b398..44692947af7c08 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -18,6 +18,7 @@ libc_support_library( "//libc:__support_cpp_string", "//libc:__support_cpp_string_view", "//libc:__support_osutil_io", + "//libc:__support_uint", "//libc:__support_uint128", ], ) From 55304d0d907fb26c298b84447a85e3a987d0adbc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 7 Mar 2024 10:46:27 +0000 Subject: [PATCH 020/158] [CostModel] getInstructionCost - improve estimation of costs for length changing shuffles (#84156) Fix gap in the cost estimation for length changing shuffles, by adjusting the shuffle mask and either widening the shuffle inputs or extracting the lower elements of the result. A small step towards moving some of this implementation inside improveShuffleKindFromMask and/or target getShuffleCost handlers (and reduce the diffs in cost estimation depending on whether coming from a ShuffleVectorInst or the raw operands / mask components) --- .../llvm/Analysis/TargetTransformInfoImpl.h | 31 +- .../CostModel/AMDGPU/shufflevector.ll | 368 +++++++++--------- .../RISCV/shuffle-extract_subvector.ll | 4 +- .../CostModel/RISCV/shuffle-interleave.ll | 4 +- .../X86/shuffle-concat_subvector-codesize.ll | 170 +++++--- .../X86/shuffle-concat_subvector-latency.ll | 178 +++++---- .../shuffle-concat_subvector-sizelatency.ll | 170 +++++--- .../CostModel/X86/shuffle-concat_subvector.ll | 178 +++++---- .../X86/shuffle-extract_subvector-codesize.ll | 39 +- .../X86/shuffle-extract_subvector-latency.ll | 41 +- .../shuffle-extract_subvector-sizelatency.ll | 39 +- .../X86/shuffle-extract_subvector.ll | 41 +- 12 files changed, 756 insertions(+), 507 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 71573b6b3b8f77..095c2ff1e58bdb 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1327,6 +1327,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { ArrayRef Mask = Shuffle->getShuffleMask(); int NumSubElts, SubIndex; + // TODO: move more of this inside improveShuffleKindFromMask. if (Shuffle->changesLength()) { // Treat a 'subvector widening' as a free shuffle. if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) @@ -1355,7 +1356,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { DemandedDstElts, CostKind); } - return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; + bool IsUnary = isa(Operands[1]); + NumSubElts = VecSrcTy->getElementCount().getKnownMinValue(); + SmallVector AdjustMask(Mask.begin(), Mask.end()); + + // Widening shuffle - widening the source(s) to the new length + // (treated as free - see above), and then perform the adjusted + // shuffle at that width. + if (Shuffle->increasesLength()) { + for (int &M : AdjustMask) + M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M; + + return TargetTTI->getShuffleCost( + IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, + AdjustMask, CostKind, 0, nullptr); + } + + // Narrowing shuffle - perform shuffle at original wider width and + // then extract the lower elements. + AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem); + + InstructionCost ShuffleCost = TargetTTI->getShuffleCost( + IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, + VecSrcTy, AdjustMask, CostKind, 0, nullptr); + + SmallVector ExtractMask(Mask.size()); + std::iota(ExtractMask.begin(), ExtractMask.end(), 0); + return ShuffleCost + TargetTTI->getShuffleCost( + TTI::SK_ExtractSubvector, VecTy, ExtractMask, + CostKind, 0, VecSrcTy, Operands); } if (Shuffle->isIdentity()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index 58f9dd3633e2c4..be5cca0765edf1 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -25,27 +25,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shufflevector_i16' @@ -65,27 +65,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shufflevector_i16' @@ -105,27 +105,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shufflevector_i16' @@ -145,27 +145,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer @@ -227,27 +227,27 @@ define amdgpu_kernel void @shufflevector_i8() { ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i8' @@ -267,27 +267,27 @@ define amdgpu_kernel void @shufflevector_i8() { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer @@ -348,27 +348,27 @@ define amdgpu_kernel void @shufflevector_i32() { ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i32' @@ -388,27 +388,27 @@ define amdgpu_kernel void @shufflevector_i32() { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer @@ -455,11 +455,11 @@ define amdgpu_kernel void @shufflevector_i32() { define void @shuffle() { ; GFX9-10-LABEL: 'shuffle' ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -475,11 +475,11 @@ define void @shuffle() { ; ; VI-LABEL: 'shuffle' ; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -495,11 +495,11 @@ define void @shuffle() { ; ; GFX9-10-SIZE-LABEL: 'shuffle' ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -515,11 +515,11 @@ define void @shuffle() { ; ; VI-SIZE-LABEL: 'shuffle' ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index 3ac2b7e26650ab..b84f22907cc71f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' @@ -32,7 +32,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; VLEN128-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; VLEN128-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll index afcf600e13ef64..dd67772042cbd3 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -56,8 +56,8 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) { ; TODO: getInstructionCost doesn't call getShuffleCost here because the shuffle changes length define {<4 x i8>, <4 x i8>} @deinterleave_2(<8 x i8> %v) { ; CHECK-LABEL: 'deinterleave_2' -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res0 = insertvalue { <4 x i8>, <4 x i8> } poison, <4 x i8> %v0, 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res1 = insertvalue { <4 x i8>, <4 x i8> } %res0, <4 x i8> %v1, 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret { <4 x i8>, <4 x i8> } %res1 diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll index 0c1c085f5afc19..61d99c20fa9668 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,17 +77,24 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -106,17 +113,24 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll index 7244b7b4f00f5d..0f8503cd1c3ccf 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,21 +19,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,21 +48,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,21 +77,28 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -106,21 +113,28 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll index b1fa00b5a71351..8c4f55eb8adcb2 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,17 +77,24 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -106,17 +113,24 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll index 60cb8cffd1a595..ffc470d5f3448c 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,21 +19,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -48,21 +48,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -77,21 +77,28 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -106,21 +113,28 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll index 9a86fbe105ec29..91314d39690520 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll index 393dec82428b34..33431083c68bb3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> @@ -53,7 +66,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll index 63bb07bf4fd894..039758e0b6e384 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index b521a759484670..6a82a4a7432ef8 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> @@ -53,7 +66,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> From 5830d1a2dff24d752459f215a0c8fc366f393596 Mon Sep 17 00:00:00 2001 From: martinboehme Date: Thu, 7 Mar 2024 11:48:51 +0100 Subject: [PATCH 021/158] Revert "[dataflow][nfc] Fix u8 string usage with c++20" (#84301) Reverts llvm/llvm-project#84291 The patch broke Windows builds. --- clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp index d9f40d28859f5e..ff4e18de2c70f1 100644 --- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -500,7 +500,7 @@ class HTMLLogger : public Logger { for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { std::string Name = blockID(I); // Rightwards arrow, vertical line - char ConvergenceMarker[] = "\\n\u2192\u007c"; + char ConvergenceMarker[] = u8"\\n\u2192\u007c"; if (BlockConverged[I]) Name += ConvergenceMarker; GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" From 4119042d76c79667b374ad85b3b92ef56cfd96e8 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Mar 2024 10:58:33 +0000 Subject: [PATCH 022/158] [AMDGPU] Simplify EXP Real instruction definitions. NFC. Pass the Pseudo (instead of its name) into EXP_Real_Row and EXP_Real_ComprVM since it is already available in all subclasses. --- llvm/lib/Target/AMDGPU/EXPInstructions.td | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index cce8734b72d4a9..b73b83031af0d6 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -37,18 +37,18 @@ class EXP_Pseudo } // Real instruction with optional asm operands "compr" and "vm". -class EXP_Real_ComprVM(pseudo)> +class EXP_Real_ComprVM : EXPCommon<0, ps.done, "exp$tgt $src0, $src1, $src2, $src3" #!if(ps.done, " done", "")#"$compr$vm">, - SIMCInstr { + SIMCInstr { let AsmMatchConverter = "cvtExp"; } // Real instruction with optional asm operand "row_en". -class EXP_Real_Row(pseudo)> +class EXP_Real_Row : EXPCommon, - SIMCInstr { + SIMCInstr { let AsmMatchConverter = "cvtExp"; } @@ -71,7 +71,7 @@ def EXP_ROW_DONE : EXP_Pseudo<1, 1>; multiclass EXP_Real_si { defvar ps = !cast(NAME); - def _si : EXP_Real_ComprVM, EXPe_ComprVM { + def _si : EXP_Real_ComprVM, EXPe_ComprVM { let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; let done = ps.done; @@ -80,7 +80,7 @@ multiclass EXP_Real_si { multiclass EXP_Real_vi { defvar ps = !cast(NAME); - def _vi : EXP_Real_ComprVM, EXPe_vi { + def _vi : EXP_Real_ComprVM, EXPe_vi { let AssemblerPredicate = isGFX8GFX9; let SubtargetPredicate = isNotGFX90APlus; let DecoderNamespace = "GFX8"; @@ -90,7 +90,7 @@ multiclass EXP_Real_vi { multiclass EXP_Real_gfx10 { defvar ps = !cast(NAME); - def _gfx10 : EXP_Real_ComprVM, EXPe_ComprVM { + def _gfx10 : EXP_Real_ComprVM, EXPe_ComprVM { let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let done = ps.done; @@ -106,7 +106,7 @@ defm EXP_DONE : EXP_Real_si, EXP_Real_vi, EXP_Real_gfx10; multiclass EXP_Real_gfx11 { defvar ps = !cast(NAME); - def _gfx11 : EXP_Real_Row, EXPe_Row { + def _gfx11 : EXP_Real_Row, EXPe_Row { let AssemblerPredicate = isGFX11Only; let DecoderNamespace = "GFX11"; let row = ps.row; @@ -116,7 +116,7 @@ multiclass EXP_Real_gfx11 { multiclass VEXPORT_Real_gfx12 { defvar ps = !cast(NAME); - def _gfx12 : EXP_Real_Row, + def _gfx12 : EXP_Real_Row, EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus, HasExportInsts]> { let AssemblerPredicate = isGFX12Only; let DecoderNamespace = "GFX12"; From 937a5396cf3e524ae40106a943a5c1f2c565fa00 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 7 Mar 2024 12:32:25 +0100 Subject: [PATCH 023/158] [libc++] Remove unused includes from __type_traits/is_convertible.h (#83747) --- libcxx/include/__type_traits/is_convertible.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libcxx/include/__type_traits/is_convertible.h b/libcxx/include/__type_traits/is_convertible.h index bc91d8b234308a..414c2a6d6a0de0 100644 --- a/libcxx/include/__type_traits/is_convertible.h +++ b/libcxx/include/__type_traits/is_convertible.h @@ -11,12 +11,6 @@ #include <__config> #include <__type_traits/integral_constant.h> -#include <__type_traits/is_array.h> -#include <__type_traits/is_function.h> -#include <__type_traits/is_void.h> -#include <__type_traits/remove_reference.h> -#include <__utility/declval.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header From 0086cc95b3b3ac4088d3d782cd490d0c08108b59 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Thu, 7 Mar 2024 12:46:42 +0100 Subject: [PATCH 024/158] [AMDGPU] Rename getNumVGPRBlocks. NFC (#84161) Rename getNumVGPRBlocks to getEncodedNumVGPRBlocks, to clarify that it's using the encoding granule. This is used to program the hardware. In practice, the hardware will use the alloc granule instead, so this patch also adds a new helper, getAllocatedNumVGPRBlocks, which can be useful when driving heuristics. --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 +-- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 +-- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 26 +++++++++++++------ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 15 ++++++++--- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 37a36b26b947c6..d9970a200804ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -868,8 +868,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( &STM, ProgInfo.NumSGPRsForWavesPerEU); - ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - &STM, ProgInfo.NumVGPRsForWavesPerEU); + ProgInfo.VGPRBlocks = + IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU); const SIModeRegisterDefaults Mode = MFI->getMode(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cb4eddfe5320fa..d5efd441556252 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5344,8 +5344,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - VGPRBlocks = - IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); + VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs, + EnableWavefrontSize32); SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0eab7acc9ebce9..62903a244dc892 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1060,10 +1060,15 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } +static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, + unsigned Granule) { + return divideCeil(std::max(1u, NumRegs), Granule); +} + unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - + 1; } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, @@ -1158,14 +1163,19 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, - std::optional EnableWavefrontSize32) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), - getVGPREncodingGranule(STI, EnableWavefrontSize32)); - // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; +unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - + 1; } +unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, + unsigned NumVGPRs, + std::optional EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); +} } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 6edf01d1217f2d..bb307cb67c9b79 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -316,13 +316,20 @@ unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs); /// \returns Number of VGPR blocks needed for given subtarget \p STI when -/// \p NumVGPRs are used. +/// \p NumVGPRs are used. We actually return the number of blocks -1, since +/// that's what we encode. /// /// For subtargets which support it, \p EnableWavefrontSize32 should match the /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned -getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, - std::optional EnableWavefrontSize32 = std::nullopt); +unsigned getEncodedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32 = std::nullopt); + +/// \returns Number of VGPR blocks that need to be allocated for the given +/// subtarget \p STI when \p NumVGPRs are used. +unsigned getAllocatedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32 = std::nullopt); } // end namespace IsaInfo From a11ab139e4de9cdad41c299f198515c09be6f05d Mon Sep 17 00:00:00 2001 From: martinboehme Date: Thu, 7 Mar 2024 12:53:26 +0100 Subject: [PATCH 025/158] [clang][dataflow] Fix u8 string error with C++20. (#84302) See also discussion on https://github.com/llvm/llvm-project/pull/84291. --- clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp index ff4e18de2c70f1..6afd66d9dc6ac5 100644 --- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -500,7 +500,7 @@ class HTMLLogger : public Logger { for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { std::string Name = blockID(I); // Rightwards arrow, vertical line - char ConvergenceMarker[] = u8"\\n\u2192\u007c"; + const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; if (BlockConverged[I]) Name += ConvergenceMarker; GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" From d5aecf0c19fc8850d7d34ac8c339bcc7e133b5fb Mon Sep 17 00:00:00 2001 From: martinboehme Date: Thu, 7 Mar 2024 13:31:23 +0100 Subject: [PATCH 026/158] [clang][nullability] Don't discard expression state before end of full-expression. (#82611) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In https://github.com/llvm/llvm-project/pull/72985, I made a change to discard expression state (`ExprToLoc` and `ExprToVal`) at the beginning of each basic block. I did so with the claim that "we never need to access entries from these maps outside of the current basic block", noting that there are exceptions to this claim when control flow happens inside a full-expression (the operands of `&&`, `||`, and the conditional operator live in different basic blocks than the operator itself) but that we already have a mechanism for retrieving the values of these operands from the environment for the block they are computed in. It turns out, however, that the operands of these operators aren't the only expressions whose values can be accessed from a different basic block; when control flow happens within a full-expression, that control flow can be "interposed" between an expression and its parent. Here is an example: ```cxx void f(int*, int); bool cond(); void target() { int i = 0; f(&i, cond() ? 1 : 0); } ``` ([godbolt](https://godbolt.org/z/hrbj1Mj3o)) In the CFG[^1] , note how the expression for `&i` is computed in block B4, but the parent of this expression (the `CallExpr`) is located in block B1. The the argument expression `&i` and the `CallExpr` are essentially "torn apart" into different basic blocks by the conditional operator in the second argument. In other words, the edge between the `CallExpr` and its argument `&i` straddles the boundary between two blocks. I used to think that this scenario -- where an edge between an expression and one of its children straddles a block boundary -- could only happen between the expression that triggers the control flow (`&&`, `||`, or the conditional operator) and its children, but the example above shows that other expressions can be affected as well; the control flow is still triggered by `&&`, `||` or the conditional operator, but the expressions affected lie outside these operators. Discarding expression state too soon is harmful. For example, an analysis that checks the arguments of the `CallExpr` above would not be able to retrieve a value for the `&i` argument. This patch therefore ensures that we don't discard expression state before the end of a full-expression. In other cases -- when the evaluation of a full-expression is complete -- we still want to discard expression state for the reasons explained in https://github.com/llvm/llvm-project/pull/72985 (avoid performing joins on boolean values that are no longer needed, which unnecessarily extends the flow condition; improve debuggability by removing clutter from the expression state). The impact on performance from this change is about a 1% slowdown in the Crubit nullability check benchmarks: ``` name old cpu/op new cpu/op delta BM_PointerAnalysisCopyPointer 71.9µs ± 1% 71.9µs ± 2% ~ (p=0.987 n=15+20) BM_PointerAnalysisIntLoop 190µs ± 1% 192µs ± 2% +1.06% (p=0.000 n=14+16) BM_PointerAnalysisPointerLoop 325µs ± 5% 324µs ± 4% ~ (p=0.496 n=18+20) BM_PointerAnalysisBranch 193µs ± 0% 192µs ± 4% ~ (p=0.488 n=14+18) BM_PointerAnalysisLoopAndBranch 521µs ± 1% 525µs ± 3% +0.94% (p=0.017 n=18+19) BM_PointerAnalysisTwoLoops 337µs ± 1% 341µs ± 3% +1.19% (p=0.004 n=17+19) BM_PointerAnalysisJoinFilePath 1.62ms ± 2% 1.64ms ± 3% +0.92% (p=0.021 n=20+20) BM_PointerAnalysisCallInLoop 1.14ms ± 1% 1.15ms ± 4% ~ (p=0.135 n=16+18) ``` [^1]: ``` [B5 (ENTRY)] Succs (1): B4 [B1] 1: [B4.9] ? [B2.1] : [B3.1] 2: [B4.4]([B4.6], [B1.1]) Preds (2): B2 B3 Succs (1): B0 [B2] 1: 1 Preds (1): B4 Succs (1): B1 [B3] 1: 0 Preds (1): B4 Succs (1): B1 [B4] 1: 0 2: int i = 0; 3: f 4: [B4.3] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(int *, int)) 5: i 6: &[B4.5] 7: cond 8: [B4.7] (ImplicitCastExpr, FunctionToPointerDecay, _Bool (*)(void)) 9: [B4.8]() T: [B4.9] ? ... : ... Preds (1): B5 Succs (2): B2 B3 [B0 (EXIT)] Preds (1): B1 ``` --- .../FlowSensitive/ControlFlowContext.h | 25 ++++- .../FlowSensitive/DataflowEnvironment.h | 11 +- .../FlowSensitive/ControlFlowContext.cpp | 38 ++++++- .../FlowSensitive/DataflowEnvironment.cpp | 28 ++++- .../TypeErasedDataflowAnalysis.cpp | 32 ++++-- .../FlowSensitive/DataflowEnvironmentTest.cpp | 3 +- .../TypeErasedDataflowAnalysisTest.cpp | 104 ++++++++++++------ 7 files changed, 189 insertions(+), 52 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index 405e93287a05d3..9a0a00f3c01343 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -58,19 +58,36 @@ class ControlFlowContext { return BlockReachable[B.getBlockID()]; } + /// Returns whether `B` contains an expression that is consumed in a + /// different block than `B` (i.e. the parent of the expression is in a + /// different block). + /// This happens if there is control flow within a full-expression (triggered + /// by `&&`, `||`, or the conditional operator). Note that the operands of + /// these operators are not the only expressions that can be consumed in a + /// different block. For example, in the function call + /// `f(&i, cond() ? 1 : 0)`, `&i` is in a different block than the `CallExpr`. + bool containsExprConsumedInDifferentBlock(const CFGBlock &B) const { + return ContainsExprConsumedInDifferentBlock.contains(&B); + } + private: - ControlFlowContext(const Decl &D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock, - llvm::BitVector BlockReachable) + ControlFlowContext( + const Decl &D, std::unique_ptr Cfg, + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable, + llvm::DenseSet ContainsExprConsumedInDifferentBlock) : ContainingDecl(D), Cfg(std::move(Cfg)), StmtToBlock(std::move(StmtToBlock)), - BlockReachable(std::move(BlockReachable)) {} + BlockReachable(std::move(BlockReachable)), + ContainsExprConsumedInDifferentBlock( + std::move(ContainsExprConsumedInDifferentBlock)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl &ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; llvm::BitVector BlockReachable; + llvm::DenseSet ContainsExprConsumedInDifferentBlock; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 62e7af7ac219bc..e8f009ef6c7913 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -210,6 +210,14 @@ class Environment { bool equivalentTo(const Environment &Other, Environment::ValueModel &Model) const; + /// How to treat expression state (`ExprToLoc` and `ExprToVal`) in a join. + /// If the join happens within a full expression, expression state should be + /// kept; otherwise, we can discard it. + enum ExprJoinBehavior { + DiscardExprState, + KeepExprState, + }; + /// Joins two environments by taking the intersection of storage locations and /// values that are stored in them. Distinct values that are assigned to the /// same storage locations in `EnvA` and `EnvB` are merged using `Model`. @@ -218,7 +226,8 @@ class Environment { /// /// `EnvA` and `EnvB` must use the same `DataflowAnalysisContext`. static Environment join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model); + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior); /// Widens the environment point-wise, using `PrevEnv` as needed to inform the /// approximation. diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 8aed19544be6a2..7c9f8fbb0a7009 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -94,6 +94,38 @@ static llvm::BitVector findReachableBlocks(const CFG &Cfg) { return BlockReachable; } +static llvm::DenseSet +buildContainsExprConsumedInDifferentBlock( + const CFG &Cfg, + const llvm::DenseMap &StmtToBlock) { + llvm::DenseSet Result; + + auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S, + const CFGBlock *Block) { + for (const Stmt *Child : S->children()) { + if (!isa(Child)) + continue; + const CFGBlock *ChildBlock = StmtToBlock.lookup(Child); + if (ChildBlock != Block) + Result.insert(ChildBlock); + } + }; + + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) + if (auto S = Element.getAs()) + CheckChildExprs(S->getStmt(), Block); + + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + CheckChildExprs(TerminatorCond, Block); + } + + return Result; +} + llvm::Expected ControlFlowContext::build(const FunctionDecl &Func) { if (!Func.doesThisDeclarationHaveABody()) @@ -140,8 +172,12 @@ ControlFlowContext::build(const Decl &D, Stmt &S, ASTContext &C) { llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + llvm::DenseSet ContainsExprConsumedInDifferentBlock = + buildContainsExprConsumedInDifferentBlock(*Cfg, StmtToBlock); + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), - std::move(BlockReachable)); + std::move(BlockReachable), + std::move(ContainsExprConsumedInDifferentBlock)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index fd7b06efcc7861..62332a18c44a4a 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -48,6 +48,24 @@ static llvm::DenseMap intersectDeclToLoc( return Result; } +// Performs a join on either `ExprToLoc` or `ExprToVal`. +// The maps must be consistent in the sense that any entries for the same +// expression must map to the same location / value. This is the case if we are +// performing a join for control flow within a full-expression (which is the +// only case when this function should be used). +template MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { + MapT Result = Map1; + + for (const auto &Entry : Map2) { + [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); + // If there was an existing entry, its value should be the same as for the + // entry we were trying to insert. + assert(It->second == Entry.second); + } + + return Result; +} + // Whether to consider equivalent two values with an unknown relation. // // FIXME: this function is a hack enabling unsoundness to support @@ -627,7 +645,8 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, } Environment Environment::join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model) { + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); @@ -675,9 +694,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, JoinedEnv.LocToVal = joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); - // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal` - // empty, as we never need to access entries in these maps outside of the - // basic block that sets them. + if (ExprBehavior == KeepExprState) { + JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); + JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); + } return JoinedEnv; } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 4c88c46142d64d..a9f39e153d0ce1 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -221,6 +221,7 @@ class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { // Avoids unneccesary copies of the environment. class JoinedStateBuilder { AnalysisContext &AC; + Environment::ExprJoinBehavior JoinBehavior; std::vector All; std::deque Owned; @@ -228,11 +229,13 @@ class JoinedStateBuilder { join(const TypeErasedDataflowAnalysisState &L, const TypeErasedDataflowAnalysisState &R) { return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), - Environment::join(L.Env, R.Env, AC.Analysis)}; + Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; } public: - JoinedStateBuilder(AnalysisContext &AC) : AC(AC) {} + JoinedStateBuilder(AnalysisContext &AC, + Environment::ExprJoinBehavior JoinBehavior) + : AC(AC), JoinBehavior(JoinBehavior) {} void addOwned(TypeErasedDataflowAnalysisState State) { Owned.push_back(std::move(State)); @@ -248,12 +251,12 @@ class JoinedStateBuilder { // initialize the state of each basic block differently. return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; if (All.size() == 1) - // Join the environment with itself so that we discard the entries from - // `ExprToLoc` and `ExprToVal`. + // Join the environment with itself so that we discard expression state if + // desired. // FIXME: We could consider writing special-case code for this that only // does the discarding, but it's not clear if this is worth it. - return {All[0]->Lattice, - Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)}; + return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, + AC.Analysis, JoinBehavior)}; auto Result = join(*All[0], *All[1]); for (unsigned I = 2; I < All.size(); ++I) @@ -307,7 +310,22 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { } } - JoinedStateBuilder Builder(AC); + // If any of the predecessor blocks contains an expression consumed in a + // different block, we need to keep expression state. + // Note that in this case, we keep expression state for all predecessors, + // rather than only those predecessors that actually contain an expression + // consumed in a different block. While this is potentially suboptimal, it's + // actually likely, if we have control flow within a full expression, that + // all predecessors have expression state consumed in a different block. + Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; + for (const CFGBlock *Pred : Preds) { + if (Pred && AC.CFCtx.containsExprConsumedInDifferentBlock(*Pred)) { + JoinBehavior = Environment::KeepExprState; + break; + } + } + + JoinedStateBuilder Builder(AC, JoinBehavior); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index 8799d03dfd3c58..465a8e21690c4a 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -190,7 +190,8 @@ TEST_F(EnvironmentTest, JoinRecords) { Env2.setValue(Loc, Val2); Environment::ValueModel Model; - Environment EnvJoined = Environment::join(Env1, Env2, Model); + Environment EnvJoined = + Environment::join(Env1, Env2, Model, Environment::DiscardExprState); auto *JoinedVal = cast(EnvJoined.getValue(Loc)); EXPECT_NE(JoinedVal, &Val1); EXPECT_NE(JoinedVal, &Val2); diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 34f9b0b23719fe..9d05a0d6ca4010 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -244,15 +244,17 @@ TEST_F(DiscardExprStateTest, WhileStatement) { EXPECT_NE(NotEqOpState.Env.getValue(NotEqOp), nullptr); // In the block that calls `foo(p)`, the value for `p != nullptr` is discarded - // because it is not consumed by this block. + // because it is not consumed outside the block it is in. const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo); EXPECT_EQ(CallFooState.Env.getValue(NotEqOp), nullptr); } TEST_F(DiscardExprStateTest, BooleanOperator) { std::string Code = R"( - bool target(bool b1, bool b2) { - return b1 && b2; + void f(); + void target(bool b1, bool b2) { + if (b1 && b2) + f(); } )"; auto BlockStates = llvm::cantFail(runAnalysis( @@ -260,46 +262,80 @@ TEST_F(DiscardExprStateTest, BooleanOperator) { const auto &AndOp = matchNode(binaryOperator(hasOperatorName("&&"))); - const auto &Return = matchNode(returnStmt()); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); // In the block that evaluates the LHS of the `&&` operator, the LHS is // associated with a value, while the right-hand side is not (unsurprisingly, // as it hasn't been evaluated yet). const auto &LHSState = blockStateForStmt(BlockStates, *AndOp.getLHS()); auto *LHSValue = cast(LHSState.Env.getValue(*AndOp.getLHS())); - ASSERT_NE(LHSValue, nullptr); + EXPECT_NE(LHSValue, nullptr); EXPECT_EQ(LHSState.Env.getValue(*AndOp.getRHS()), nullptr); - // In the block that evaluates the RHS, the RHS is associated with a - // value. The value for the LHS has been discarded as it is not consumed by - // this block. + // In the block that evaluates the RHS, both the LHS and RHS are associated + // with values, as they are both subexpressions of the `&&` operator, which + // is evaluated in a later block. const auto &RHSState = blockStateForStmt(BlockStates, *AndOp.getRHS()); - EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), nullptr); - auto *RHSValue = cast(RHSState.Env.getValue(*AndOp.getRHS())); - ASSERT_NE(RHSValue, nullptr); - - // In the block that evaluates the return statement, the expression `b1 && b2` - // is associated with a value (and check that it's the right one). - // The expressions `b1` and `b2` are _not_ associated with a value in this - // block, even though they are consumed by the block, because: - // * This block has two prececessor blocks (the one that evaluates `b1` and - // the one that evaluates `b2`). - // * `b1` is only associated with a value in the block that evaluates `b1` but - // not the block that evalutes `b2`, so the join operation discards the - // value for `b1`. - // * `b2` is only associated with a value in the block that evaluates `b2` but - // not the block that evaluates `b1`, the the join operation discards the - // value for `b2`. - // Nevertheless, the analysis generates the correct formula for `b1 && b2` - // because the transfer function for the `&&` operator retrieves the values - // for its operands from the environments for the blocks that compute the - // operands, rather than from the environment for the block that contains the - // `&&`. - const auto &ReturnState = blockStateForStmt(BlockStates, Return); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getLHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getRHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(AndOp), - &ReturnState.Env.makeAnd(*LHSValue, *RHSValue)); + EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), LHSValue); + auto *RHSValue = RHSState.Env.get(*AndOp.getRHS()); + EXPECT_NE(RHSValue, nullptr); + + // In the block that evaluates `b1 && b2`, the `&&` as well as its operands + // are associated with values. + const auto &AndOpState = blockStateForStmt(BlockStates, AndOp); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getLHS()), LHSValue); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getRHS()), RHSValue); + EXPECT_EQ(AndOpState.Env.getValue(AndOp), + &AndOpState.Env.makeAnd(*LHSValue, *RHSValue)); + + // In the block that calls `f()`, none of `b1`, `b2`, or `b1 && b2` should be + // associated with values. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getLHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getRHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(AndOp), nullptr); +} + +TEST_F(DiscardExprStateTest, ConditionalOperator) { + std::string Code = R"( + void f(int*, int); + void g(); + bool cond(); + + void target() { + int i = 0; + if (cond()) + f(&i, cond() ? 1 : 0); + g(); + } + )"; + auto BlockStates = llvm::cantFail(runAnalysis( + Code, [](ASTContext &C) { return NoopAnalysis(C); })); + + const auto &AddrOfI = + matchNode(unaryOperator(hasOperatorName("&"))); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); + const auto &CallG = + matchNode(callExpr(callee(functionDecl(hasName("g"))))); + + // In the block that evaluates `&i`, it should obviously have a value. + const auto &AddrOfIState = blockStateForStmt(BlockStates, AddrOfI); + auto *AddrOfIVal = AddrOfIState.Env.get(AddrOfI); + EXPECT_NE(AddrOfIVal, nullptr); + + // Because of the conditional operator, the `f(...)` call is evaluated in a + // different block than `&i`, but `&i` still needs to have a value here + // because it's a subexpression of the call. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_NE(&CallFState, &AddrOfIState); + EXPECT_EQ(CallFState.Env.get(AddrOfI), AddrOfIVal); + + // In the block that calls `g()`, `&i` should no longer be associated with a + // value. + const auto &CallGState = blockStateForStmt(BlockStates, CallG); + EXPECT_EQ(CallGState.Env.get(AddrOfI), nullptr); } struct NonConvergingLattice { From 59e405b39416c8a5e2af93b2bfaa97a8c9d67f06 Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Thu, 7 Mar 2024 13:46:31 +0100 Subject: [PATCH 027/158] [flang][OpenMP] Add `%flang_fc1` `RUN` to delayed privatization tests (#84296) I did not know how `-mmlir` flag works and was deferring the addition of `--openm-enabled-delayed-privatization` until later because I thought some work needs to be done to do that. This commit just adds some extra `RUN` lines to delayed privatization tests to run them from `flang` as well. --- .../Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 | 5 ++++- .../test/Lower/OpenMP/FIR/delayed-privatization-private.f90 | 5 ++++- .../test/Lower/OpenMP/delayed-privatization-firstprivate.f90 | 5 ++++- .../OpenMP/delayed-privatization-private-firstprivate.f90 | 5 ++++- flang/test/Lower/OpenMP/delayed-privatization-private.f90 | 5 ++++- flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 | 5 ++++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 index 122542345f104b..50938342dee7c2 100644 --- a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir \ +! RUN: --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s subroutine delayed_privatization_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 index 2e9995ea1fd4c4..b13687faa3f26d 100644 --- a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir \ +! RUN: --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s subroutine delayed_privatization_private implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 index e3d2a5a8af2608..0fb81d68016a48 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `firstprivate` clause. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 index 46eef6eb3bcf6a..337e7d5ec885cb 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for both `private` and `firstprivate` clauses. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_private_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 index 240e0e71bfcd16..7208521bcd77e4 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_private implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 index c61f352b9b055a..a7eeb1faceadef 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 @@ -3,7 +3,10 @@ ! that the block arguments are added in the proper order (reductions first and ! then delayed privatization. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine red_and_delayed_private integer :: red From 464d9d96b3565ead06396ffb8d02b4dcf9cb9556 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Thu, 7 Mar 2024 13:05:04 +0000 Subject: [PATCH 028/158] [RemoveDIs][DebugInfo][IR] Add parsing for non-intrinsic debug values (#79818) This patch adds support for parsing the proposed non-instruction debug info ("RemoveDIs") from textual IR, and adds a test for the parser as well as a set of verifier tests that are dependent on parsing to fire. An important detail of this patch is the fact that although we can now parse in the RemoveDIs (new) and Intrinsic (old) debug info formats, we will always convert back to the old format at the end of parsing - this is done for two reasons: firstly to ensure that every tool is able to process IR printed in the new format, regardless of whether that tool has had RemoveDIs support added, and secondly to maintain the effect of the existing flags: for the tools where support for the new format has been added, we will run LLVM passes in the new format iff `--try-experimental-debuginfo-iterators=true`, and we will print in the new format iff `--write-experimental-debuginfo-iterators=true`; the format of the textual IR input should have no effect on either of these features. --- llvm/include/llvm/AsmParser/LLParser.h | 4 + llvm/include/llvm/AsmParser/LLToken.h | 2 + .../include/llvm/IR/DebugProgramInstruction.h | 33 ++++ llvm/lib/AsmParser/LLLexer.cpp | 20 ++- llvm/lib/AsmParser/LLParser.cpp | 168 +++++++++++++++++- llvm/lib/IR/DebugProgramInstruction.cpp | 27 +++ llvm/lib/IR/Verifier.cpp | 2 +- llvm/test/Assembler/dbg-record-invalid-0.ll | 38 ++++ llvm/test/Assembler/dbg-record-invalid-1.ll | 39 ++++ llvm/test/Assembler/dbg-record-invalid-2.ll | 36 ++++ llvm/test/Assembler/dbg-record-invalid-3.ll | 39 ++++ llvm/test/Assembler/dbg-record-invalid-4.ll | 36 ++++ llvm/test/Assembler/dbg-record-invalid-5.ll | 35 ++++ llvm/test/Assembler/dbg-record-invalid-6.ll | 36 ++++ llvm/test/Assembler/dbg-record-invalid-7.ll | 36 ++++ llvm/test/Assembler/dbg-record-invalid-8.ll | 36 ++++ .../roundtrip-non-instruction-debug-info.ll | 94 ++++++++++ llvm/test/Verifier/RemoveDI/blockbyref.ll | 18 ++ .../Verifier/RemoveDI/dbg-invalid-vector.ll | 35 ++++ .../RemoveDI/di-subroutine-localvar.ll | 41 +++++ .../diexpression-entry-value-llvm-ir.ll | 34 ++++ .../test/Verifier/RemoveDI/fnarg-debuginfo.ll | 26 +++ llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll | 58 ++++++ .../RemoveDI/invalid-disubrange-count-node.ll | 36 ++++ .../RemoveDI/llvm.dbg.declare-address.ll | 16 ++ .../RemoveDI/llvm.dbg.declare-expression.ll | 16 ++ .../RemoveDI/llvm.dbg.declare-variable.ll | 17 ++ .../llvm.dbg.intrinsic-dbg-attachment.ll | 55 ++++++ .../RemoveDI/llvm.dbg.value-expression.ll | 16 ++ .../Verifier/RemoveDI/llvm.dbg.value-value.ll | 17 ++ .../RemoveDI/llvm.dbg.value-variable.ll | 17 ++ llvm/test/Verifier/RemoveDI/set1.ll | 62 +++++++ 32 files changed, 1141 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Assembler/dbg-record-invalid-0.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-1.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-2.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-3.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-4.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-5.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-6.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-7.ll create mode 100644 llvm/test/Assembler/dbg-record-invalid-8.ll create mode 100644 llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll create mode 100644 llvm/test/Verifier/RemoveDI/blockbyref.ll create mode 100644 llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll create mode 100644 llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll create mode 100644 llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll create mode 100644 llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll create mode 100644 llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll create mode 100644 llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll create mode 100644 llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll create mode 100644 llvm/test/Verifier/RemoveDI/set1.ll diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index f07f4c61f9d649..e5e1ade8b38b36 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -178,6 +178,9 @@ namespace llvm { /// UpgradeDebuginfo so it can generate broken bitcode. bool UpgradeDebugInfo; + bool SeenNewDbgInfoFormat = false; + bool SeenOldDbgInfoFormat = false; + std::string SourceFileName; public: @@ -573,6 +576,7 @@ namespace llvm { bool parseMDNodeTail(MDNode *&N); bool parseMDNodeVector(SmallVectorImpl &Elts); bool parseMetadataAttachment(unsigned &Kind, MDNode *&MD); + bool parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS); bool parseInstructionMetadata(Instruction &Inst); bool parseGlobalObjectMetadataAttachment(GlobalObject &GO); bool parseOptionalFunctionMetadata(Function &F); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 3c34706ee03e82..5863a8d6e8ee84 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -36,6 +36,7 @@ enum Kind { exclaim, // ! bar, // | colon, // : + hash, // # kw_vscale, kw_x, @@ -479,6 +480,7 @@ enum Kind { DISPFlag, // DISPFlagFoo DwarfMacinfo, // DW_MACINFO_foo ChecksumKind, // CSK_foo + DbgRecordType, // dbg_foo // Type valued tokens (TyVal). Type, diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h index cf30b4d0b0aaf0..a8faf415a3ea87 100644 --- a/llvm/include/llvm/IR/DebugProgramInstruction.h +++ b/llvm/include/llvm/IR/DebugProgramInstruction.h @@ -223,9 +223,19 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DbgRecord &R) { class DPLabel : public DbgRecord { DbgRecordParamRef Label; + /// This constructor intentionally left private, so that it is only called via + /// "createUnresolvedDPLabel", which clearly expresses that it is for parsing + /// only. + DPLabel(MDNode *Label, MDNode *DL); + public: DPLabel(DILabel *Label, DebugLoc DL); + /// For use during parsing; creates a DPLabel from as-of-yet unresolved + /// MDNodes. Trying to access the resulting DPLabel's fields before they are + /// resolved, or if they resolve to the wrong type, will result in a crash. + static DPLabel *createUnresolvedDPLabel(MDNode *Label, MDNode *DL); + DPLabel *clone() const; void print(raw_ostream &O, bool IsForDebug = false) const; void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const; @@ -286,6 +296,29 @@ class DPValue : public DbgRecord, protected DebugValueUser { DIAssignID *AssignID, Metadata *Address, DIExpression *AddressExpression, const DILocation *DI); +private: + /// Private constructor for creating new instances during parsing only. Only + /// called through `createUnresolvedDPValue` below, which makes clear that + /// this is used for parsing only, and will later return a subclass depending + /// on which Type is passed. + DPValue(LocationType Type, Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, MDNode *DI); + +public: + /// Used to create DPValues during parsing, where some metadata references may + /// still be unresolved. Although for some fields a generic `Metadata*` + /// argument is accepted for forward type-references, the verifier and + /// accessors will reject incorrect types later on. The function is used for + /// all types of DPValues for simplicity while parsing, but asserts if any + /// necessary fields are empty or unused fields are not empty, i.e. if the + /// #dbg_assign fields are used for a non-dbg-assign type. + static DPValue *createUnresolvedDPValue(LocationType Type, Metadata *Val, + MDNode *Variable, MDNode *Expression, + MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, + MDNode *DI); + static DPValue *createDPVAssign(Value *Val, DILocalVariable *Variable, DIExpression *Expression, DIAssignID *AssignID, Value *Address, diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 5d8a50eee13068..02f64fcfac4f0c 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -438,9 +438,12 @@ lltok::Kind LLLexer::LexCaret() { /// Lex all tokens that start with a # character. /// AttrGrpID ::= #[0-9]+ +/// Hash ::= # lltok::Kind LLLexer::LexHash() { // Handle AttrGrpID: #[0-9]+ - return LexUIntID(lltok::AttrGrpID); + if (isdigit(static_cast(CurPtr[0]))) + return LexUIntID(lltok::AttrGrpID); + return lltok::hash; } /// Lex a label, integer type, keyword, or hexadecimal integer constant. @@ -923,6 +926,21 @@ lltok::Kind LLLexer::LexIdentifier() { #undef DWKEYWORD +// Keywords for debug record types. +#define DBGRECORDTYPEKEYWORD(STR) \ + do { \ + if (Keyword == "dbg_" #STR) { \ + StrVal = #STR; \ + return lltok::DbgRecordType; \ + } \ + } while (false) + + DBGRECORDTYPEKEYWORD(value); + DBGRECORDTYPEKEYWORD(declare); + DBGRECORDTYPEKEYWORD(assign); + DBGRECORDTYPEKEYWORD(label); +#undef DBGRECORDTYPEKEYWORD + if (Keyword.starts_with("DIFlag")) { StrVal.assign(Keyword.begin(), Keyword.end()); return lltok::DIFlag; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index a91e2f690999e0..e140c94195205a 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -62,6 +62,8 @@ static cl::opt AllowIncompleteIR( "Allow incomplete IR on a best effort basis (references to unknown " "metadata will be dropped)")); +extern llvm::cl::opt UseNewDbgInfoFormat; + static std::string getTypeString(Type *T) { std::string Result; raw_string_ostream Tmp(Result); @@ -69,6 +71,15 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } +// Currently, we should always process modules in the old debug info format by +// default regardless of the module's format in IR; convert it to the old format +// here. +bool finalizeDebugInfoFormat(Module *M) { + if (M) + M->setIsNewDbgInfoFormat(false); + return false; +} + /// Run: module ::= toplevelentity* bool LLParser::Run(bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback) { @@ -86,7 +97,7 @@ bool LLParser::Run(bool UpgradeDebugInfo, } return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) || - validateEndOfIndex(); + validateEndOfIndex() || finalizeDebugInfoFormat(M); } bool LLParser::parseStandaloneConstantValue(Constant *&C, @@ -6041,6 +6052,17 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, return false; } +bool isOldDbgFormatIntrinsic(StringRef Name) { + // Exit early for the common (non-debug-intrinsic) case. + // We can make this the only check when we begin supporting all "llvm.dbg" + // intrinsics in the new debug info format. + if (!Name.starts_with("llvm.dbg.")) + return false; + Intrinsic::ID FnID = Function::lookupIntrinsicID(Name); + return FnID == Intrinsic::dbg_declare || FnID == Intrinsic::dbg_value || + FnID == Intrinsic::dbg_assign; +} + /// FunctionHeader /// ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility /// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName @@ -6390,9 +6412,31 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { std::string NameStr; - // parse the instructions in this block until we get a terminator. + // Parse the instructions and debug values in this block until we get a + // terminator. Instruction *Inst; + auto DeleteDbgRecord = [](DbgRecord *DR) { DR->deleteRecord(); }; + using DbgRecordPtr = std::unique_ptr; + SmallVector TrailingDbgRecord; do { + // Handle debug records first - there should always be an instruction + // following the debug records, i.e. they cannot appear after the block + // terminator. + while (Lex.getKind() == lltok::hash) { + if (SeenOldDbgInfoFormat) + return error(Lex.getLoc(), "debug record should not appear in a module " + "containing debug info intrinsics"); + SeenNewDbgInfoFormat = true; + Lex.Lex(); + if (!M->IsNewDbgInfoFormat) + M->convertToNewDbgValues(); + + DbgRecord *DR; + if (parseDebugRecord(DR, PFS)) + return true; + TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord); + } + // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". LocTy NameLoc = Lex.getLoc(); @@ -6437,11 +6481,121 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { // Set the name on the instruction. if (PFS.setInstName(NameID, NameStr, NameLoc, Inst)) return true; + + // Attach any preceding debug values to this instruction. + for (DbgRecordPtr &DR : TrailingDbgRecord) + BB->insertDPValueBefore(DR.release(), Inst->getIterator()); + TrailingDbgRecord.clear(); } while (!Inst->isTerminator()); + assert(TrailingDbgRecord.empty() && + "All debug values should have been attached to an instruction."); + return false; } +/// parseDebugRecord +/// ::= #dbg_label '(' MDNode ')' +/// ::= #dbg_type '(' Metadata ',' MDNode ',' Metadata ',' +/// (MDNode ',' Metadata ',' Metadata ',')? MDNode ')' +bool LLParser::parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS) { + using RecordKind = DbgRecord::Kind; + using LocType = DPValue::LocationType; + LocTy DPVLoc = Lex.getLoc(); + if (Lex.getKind() != lltok::DbgRecordType) + return error(DPVLoc, "expected debug record type here"); + RecordKind RecordType = StringSwitch(Lex.getStrVal()) + .Case("declare", RecordKind::ValueKind) + .Case("value", RecordKind::ValueKind) + .Case("assign", RecordKind::ValueKind) + .Case("label", RecordKind::LabelKind); + + // Parsing labels is trivial; parse here and early exit, otherwise go into the + // full DPValue processing stage. + if (RecordType == RecordKind::LabelKind) { + Lex.Lex(); + if (parseToken(lltok::lparen, "Expected '(' here")) + return true; + MDNode *Label; + if (parseMDNode(Label)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + MDNode *DbgLoc; + if (parseMDNode(DbgLoc)) + return true; + if (parseToken(lltok::rparen, "Expected ')' here")) + return true; + DR = DPLabel::createUnresolvedDPLabel(Label, DbgLoc); + return false; + } + + LocType ValueType = StringSwitch(Lex.getStrVal()) + .Case("declare", LocType::Declare) + .Case("value", LocType::Value) + .Case("assign", LocType::Assign); + + Lex.Lex(); + if (parseToken(lltok::lparen, "Expected '(' here")) + return true; + + // Parse Value field. + Metadata *ValLocMD; + if (parseMetadata(ValLocMD, &PFS)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse Variable field. + MDNode *Variable; + if (parseMDNode(Variable)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse Expression field. + MDNode *Expression; + if (parseMDNode(Expression)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse additional fields for #dbg_assign. + MDNode *AssignID = nullptr; + Metadata *AddressLocation = nullptr; + MDNode *AddressExpression = nullptr; + if (ValueType == LocType::Assign) { + // Parse DIAssignID. + if (parseMDNode(AssignID)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse address ValueAsMetadata. + if (parseMetadata(AddressLocation, &PFS)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse address DIExpression. + if (parseMDNode(AddressExpression)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + } + + /// Parse DILocation. + MDNode *DebugLoc; + if (parseMDNode(DebugLoc)) + return true; + + if (parseToken(lltok::rparen, "Expected ')' here")) + return true; + DR = DPValue::createUnresolvedDPValue(ValueType, ValLocMD, Variable, + Expression, AssignID, AddressLocation, + AddressExpression, DebugLoc); + return false; +} //===----------------------------------------------------------------------===// // Instruction Parsing. //===----------------------------------------------------------------------===// @@ -7669,6 +7823,16 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS, } CI->setFastMathFlags(FMF); } + + if (CalleeID.Kind == ValID::t_GlobalName && + isOldDbgFormatIntrinsic(CalleeID.StrVal)) { + if (SeenNewDbgInfoFormat) { + CI->deleteValue(); + return error(CallLoc, "llvm.dbg intrinsic should not appear in a module " + "using non-intrinsic debug info"); + } + SeenOldDbgInfoFormat = true; + } CI->setAttributes(PAL); ForwardRefAttrGroups[CI] = FwdRefAttrGrps; Inst = CI; diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index a8d64024e1797b..5ff1e8c19db68b 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -138,11 +138,38 @@ DbgRecord::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const { llvm_unreachable("unsupported DbgRecord kind"); } +DPLabel::DPLabel(MDNode *Label, MDNode *DL) + : DbgRecord(LabelKind, DebugLoc(DL)), Label(Label) { + assert(Label && "Unexpected nullptr"); + assert((isa(Label) || Label->isTemporary()) && + "Label type must be or resolve to a DILabel"); +} DPLabel::DPLabel(DILabel *Label, DebugLoc DL) : DbgRecord(LabelKind, DL), Label(Label) { assert(Label && "Unexpected nullptr"); } +DPLabel *DPLabel::createUnresolvedDPLabel(MDNode *Label, MDNode *DL) { + return new DPLabel(Label, DL); +} + +DPValue::DPValue(DPValue::LocationType Type, Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, MDNode *DI) + : DbgRecord(ValueKind, DebugLoc(DI)), + DebugValueUser({Val, Address, AssignID}), Type(Type), Variable(Variable), + Expression(Expression), AddressExpression(AddressExpression) {} + +DPValue *DPValue::createUnresolvedDPValue(DPValue::LocationType Type, + Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, + Metadata *Address, + MDNode *AddressExpression, + MDNode *DI) { + return new DPValue(Type, Val, Variable, Expression, AssignID, Address, + AddressExpression, DI); +} + DPValue *DPValue::createDPValue(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI) { return new DPValue(ValueAsMetadata::get(Location), DV, Expr, DI, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 74c7354e7bf1bb..fd5f7d57c258d4 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6291,7 +6291,7 @@ void Verifier::visit(DPValue &DPV) { Var->getRawType()); auto *DLNode = DPV.getDebugLoc().getAsMDNode(); - CheckDI(isa_and_nonnull(DLNode), "invalid #dbg record location", + CheckDI(isa_and_nonnull(DLNode), "invalid #dbg record DILocation", &DPV, DLNode); DILocation *Loc = DPV.getDebugLoc(); diff --git a/llvm/test/Assembler/dbg-record-invalid-0.ll b/llvm/test/Assembler/dbg-record-invalid-0.ll new file mode 100644 index 00000000000000..feb513a405f9ec --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-0.ll @@ -0,0 +1,38 @@ +;; Test that we get a parser error when a debug record appears post-terminator. +;; Note: From the parser's perspective, the error is that the debug record is +;; appearing at the start of a new unnamed basic block which contains no actual +;; instructions. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + ret i32 %a, !dbg !18 + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:1: error: expected instruction opcode +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-1.ll b/llvm/test/Assembler/dbg-record-invalid-1.ll new file mode 100644 index 00000000000000..7ab5751777e8cf --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-1.ll @@ -0,0 +1,39 @@ +;; Test that we get a parser error when a debug intrinsic appears in the same +;; module as a debug record. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:8: error: llvm.dbg intrinsic should not appear in a module using non-intrinsic debug info + call void @llvm.dbg.value(metadata i32 %a, metadata !12, metadata !DIExpression()), !dbg !14 + ret i32 %a, !dbg !18 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-2.ll b/llvm/test/Assembler/dbg-record-invalid-2.ll new file mode 100644 index 00000000000000..a019f73feab9c5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-2.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:24: error: expected '!' here + #dbg_value(i32 %a, i32 0, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-3.ll b/llvm/test/Assembler/dbg-record-invalid-3.ll new file mode 100644 index 00000000000000..e6f072373f54d5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-3.ll @@ -0,0 +1,39 @@ +;; Test that we get a parser error when a debug record appears in the same +;; module as a debug intrinsic. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !12, metadata !DIExpression()), !dbg !14 +; CHECK: :[[@LINE+1]]:5: error: debug record should not appear in a module containing debug info intrinsics + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-4.ll b/llvm/test/Assembler/dbg-record-invalid-4.ll new file mode 100644 index 00000000000000..f898477603c8e2 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-4.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an invalid +;; type. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:6: error: expected debug record type here + #dbg_invalid(!DIArgList(i32 %a), !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-5.ll b/llvm/test/Assembler/dbg-record-invalid-5.ll new file mode 100644 index 00000000000000..5ea588b87668c4 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-5.ll @@ -0,0 +1,35 @@ +;; Test that we get a parser error when a basic block contains only a debug +;; record. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:1: error: expected instruction opcode +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-6.ll b/llvm/test/Assembler/dbg-record-invalid-6.ll new file mode 100644 index 00000000000000..72dafcdb97fce4 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-6.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:46: error: expected '!' here + #dbg_value(i32 %a, !12, !DIExpression(), i32 0) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-7.ll b/llvm/test/Assembler/dbg-record-invalid-7.ll new file mode 100644 index 00000000000000..036a85a2977fc7 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-7.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an incorrect +;; number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:44: error: Expected ',' here + #dbg_value(i32 %a, !12, !DIExpression()) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-8.ll b/llvm/test/Assembler/dbg-record-invalid-8.ll new file mode 100644 index 00000000000000..d0b8f36d7895c5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-8.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug assign record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:50: error: Expected ',' here + #dbg_assign(i32 %a, !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll new file mode 100644 index 00000000000000..b15b76d1690c41 --- /dev/null +++ b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll @@ -0,0 +1,94 @@ +;; Test that we can write in the old debug info format. +; RUN: opt --passes=verify -S --write-experimental-debuginfo=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; Test that we can write in the new debug info format... +; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; ...and then read the new format and write the old format. +; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \ +; RUN: | opt --passes=verify -S --write-experimental-debuginfo=false \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; Test also that the new flag is independent of the flag that enables use of +;; these non-instruction debug info during LLVM passes. +; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg +; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +; CHECK: @f(i32 %[[VAL_A:[0-9a-zA-Z]+]]) +; CHECK-NEXT: entry: +; OLDDBG-NEXT: call void @llvm.dbg.value(metadata i32 %[[VAL_A]], metadata ![[VAR_A:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_1:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_value(i32 %[[VAL_A]], ![[VAR_A:[0-9]+]], !DIExpression(), ![[LOC_1:[0-9]+]]) +; CHECK-NEXT: {{^}} %[[VAL_B:[0-9a-zA-Z]+]] = alloca +; OLDDBG-NEXT: call void @llvm.dbg.declare(metadata ptr %[[VAL_B]], metadata ![[VAR_B:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_2:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_declare(ptr %[[VAL_B]], ![[VAR_B:[0-9]+]], !DIExpression(), ![[LOC_2:[0-9]+]]) +; CHECK-NEXT: {{^}} %[[VAL_ADD:[0-9a-zA-Z]+]] = add i32 %[[VAL_A]], 5 +; OLDDBG-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 %[[VAL_A]], i32 %[[VAL_ADD]]), metadata ![[VAR_A]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg ![[LOC_3:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_value(!DIArgList(i32 %[[VAL_A]], i32 %[[VAL_ADD]]), ![[VAR_A]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus), ![[LOC_3:[0-9]+]]) +; OLDDBG-NEXT: call void @llvm.dbg.label(metadata ![[LABEL_ID:[0-9]+]]), !dbg ![[LOC_3]] +; NEWDBG-NEXT: {{^}} #dbg_label(![[LABEL_ID:[0-9]+]], ![[LOC_3]]) +; CHECK-NEXT: {{^}} store i32 %[[VAL_ADD]]{{.+}}, !DIAssignID ![[ASSIGNID:[0-9]+]] +; OLDDBG-NEXT: call void @llvm.dbg.assign(metadata i32 %[[VAL_ADD]], metadata ![[VAR_B]], metadata !DIExpression(), metadata ![[ASSIGNID]], metadata ptr %[[VAL_B]], metadata !DIExpression()), !dbg ![[LOC_4:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_assign(i32 %[[VAL_ADD]], ![[VAR_B]], !DIExpression(), ![[ASSIGNID]], ptr %[[VAL_B]], !DIExpression(), ![[LOC_4:[0-9]+]]) +; CHECK-NEXT: {{^}} ret i32 + +; OLDDBG-DAG: declare void @llvm.dbg.value +; OLDDBG-DAG: declare void @llvm.dbg.declare +; OLDDBG-DAG: declare void @llvm.dbg.assign +; OLDDBG-DAG: declare void @llvm.dbg.label + +; CHECK-DAG: llvm.dbg.cu +; CHECK-DAG: ![[VAR_A]] = !DILocalVariable(name: "a" +; CHECK-DAG: ![[VAR_B]] = !DILocalVariable(name: "b" +; CHECK-DAG: ![[LOC_1]] = !DILocation(line: 3, column: 15 +; CHECK-DAG: ![[LOC_2]] = !DILocation(line: 3, column: 20 +; CHECK-DAG: ![[LOC_3]] = !DILocation(line: 3, column: 25 +; CHECK-DAG: ![[LOC_4]] = !DILocation(line: 3, column: 30 +; CHECK-DAG: ![[LABEL_ID]] = !DILabel( + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !20, metadata !DIExpression()), !dbg !30 + %b = alloca i32, !dbg !30, !DIAssignID !40 + call void @llvm.dbg.declare(metadata ptr %b, metadata !21, metadata !DIExpression()), !dbg !31 + %add = add i32 %a, 5, !dbg !31 + call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %add), metadata !20, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg !32 + call void @llvm.dbg.label(metadata !50), !dbg !32 + store i32 %add, ptr %b, !dbg !32, !DIAssignID !40 + call void @llvm.dbg.assign(metadata i32 %add, metadata !21, metadata !DIExpression(), metadata !40, metadata ptr %b, metadata !DIExpression()), !dbg !33 + ret i32 %add, !dbg !33 + +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) +declare void @llvm.dbg.label(metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !13) +!8 = !DISubroutineType(types: !9) +!9 = !{!12, !12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!20, !21} +!20 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !12) +!21 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !12) +!30 = !DILocation(line: 3, column: 15, scope: !7) +!31 = !DILocation(line: 3, column: 20, scope: !7) +!32 = !DILocation(line: 3, column: 25, scope: !7) +!33 = !DILocation(line: 3, column: 30, scope: !7) +!40 = distinct !DIAssignID() +!50 = !DILabel(scope: !7, name: "label", file: !1, line: 3) \ No newline at end of file diff --git a/llvm/test/Verifier/RemoveDI/blockbyref.ll b/llvm/test/Verifier/RemoveDI/blockbyref.ll new file mode 100644 index 00000000000000..86321a6ae78e80 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/blockbyref.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as -disable-output <%s 2>&1| FileCheck %s + +; CHECK: DIBlockByRefStruct on DICompositeType is no longer supported +; CHECK: warning: ignoring invalid debug info + +define void @foo() { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !2, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() +!2 = !DILocalVariable(scope: !1, type: !3) +!3 = !DICompositeType(tag: DW_TAG_structure_type, flags: DIFlagReservedBit4) diff --git a/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll b/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll new file mode 100644 index 00000000000000..0832c361c3080e --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll @@ -0,0 +1,35 @@ +; RUN: opt -passes=verify -disable-output <%s 2>&1 | FileCheck %s +; +; This test creates an invalid vector by defining multiple elements for the +; vector's DICompositeType definition. A vector should only have one element +; in its DICompositeType 'elements' array. +; +; CHECK: invalid vector + +@f.foo = private unnamed_addr constant <6 x float> zeroinitializer, align 32 + +define void @f() { + %1 = alloca <6 x float>, align 32 + #dbg_declare(ptr %1, !10, !DIExpression(), !18) + ret void +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "/dbg/info") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "foo", scope: !7, file: !1, line: 4, type: !12) +!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 256, flags: DIFlagVector, elements: !14) +!13 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!14 = !{!15, !19} +!15 = !DISubrange(count: 6) +!18 = !DILocation(line: 4, column: 48, scope: !7) +!19 = !DISubrange(count: 42) diff --git a/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll b/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll new file mode 100644 index 00000000000000..14e58883989968 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll @@ -0,0 +1,41 @@ +; RUN: opt %s -passes=verify 2>&1 | FileCheck %s +; CHECK: invalid type +; CHECK: !20 = !DILocalVariable(name: "f", scope: !21, file: !13, line: 970, type: !14) +; CHECK: !14 = !DISubroutineType(types: !15) + + +%timespec.0.1.2.3.0.1.2 = type { i64, i64 } +define internal i64 @init_vdso_clock_gettime(i32, ptr nonnull) unnamed_addr !dbg !142 { + #dbg_value(ptr null, !162, !DIExpression(), !167) + ret i64 -38, !dbg !168 +} +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "zig 0.3.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !76) +!2 = !DIFile(filename: "test", directory: ".") +!3 = !{!4} +!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Arch", scope: !5, file: !5, line: 44, baseType: !6, size: 8, align: 8, elements: !7) +!5 = !DIFile(filename: "builtin.zig", directory: "/home/andy/.local/share/zig/stage1/builtin/ugMGxVES9OkDAffv3xhJS3KQVy0Wm1xPM3Bc6x4MBuup5aetdi5pVTrGRG2aDAn0") +!6 = !DIBasicType(name: "u7", size: 8, encoding: DW_ATE_unsigned) +!7 = !{!8} +!8 = !DIEnumerator(name: "armv8_5a", value: 0) +!76 = !{!77} +!77 = !DIGlobalVariableExpression(var: !78, expr: !DIExpression()) +!78 = distinct !DIGlobalVariable(name: "arch", linkageName: "arch", scope: !5, file: !5, line: 437, type: !4, isLocal: true, isDefinition: true) +!81 = !DIFile(filename: "index.zig", directory: "/store/dev/zig/build-llvm8-debug/lib/zig/std/os/linux") +!142 = distinct !DISubprogram(name: "init_vdso_clock_gettime", scope: !81, file: !81, line: 968, type: !143, scopeLine: 968, flags: DIFlagStaticMember, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, retainedNodes: !153) +!143 = !DISubroutineType(types: !144) +!144 = !{!145} +!145 = !DIBasicType(name: "usize", size: 64, encoding: DW_ATE_unsigned) +!146 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) +!153 = !{!154} +!154 = !DILocalVariable(name: "clk", arg: 1, scope: !142, file: !81, line: 968, type: !146) +!162 = !DILocalVariable(name: "f", scope: !163, file: !81, line: 970, type: !143) +!163 = distinct !DILexicalBlock(scope: !164, file: !81, line: 969, column: 5) +!164 = distinct !DILexicalBlock(scope: !165, file: !81, line: 968, column: 66) +!165 = distinct !DILexicalBlock(scope: !166, file: !81, line: 968, column: 45) +!166 = distinct !DILexicalBlock(scope: !142, file: !81, line: 968, column: 35) +!167 = !DILocation(line: 970, column: 5, scope: !163) +!168 = !DILocation(line: 972, column: 28, scope: !169) +!169 = distinct !DILexicalBlock(scope: !163, file: !81, line: 970, column: 5) diff --git a/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll b/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll new file mode 100644 index 00000000000000..881ec4a86fb644 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as -disable-output <%s 2>&1| FileCheck %s + +; CHECK-NOT: #dbg_value +; CHECK: Entry values are only allowed in MIR unless they target a swiftasync Argument +; CHECK: #dbg_value(i32 %param, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1) +; CHECK-NOT: #dbg_value +; CHECK-NOT: Entry values are only allowed +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %param, ptr swiftasync %ok_param) !dbg !4 { +entry: + #dbg_value(i32 %param, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr %ok_param, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr poison, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr undef, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + ret void +} + + +attributes #0 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "a.c", directory: "/") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, type: !5, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{null, !7} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !DILocalVariable(name: "param", arg: 1, scope: !4, file: !1, type: !7) +!9 = !DILocation(line: 0, scope: !4) diff --git a/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll b/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll new file mode 100644 index 00000000000000..db1a9a8ba18945 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as -disable-output < %s -o /dev/null 2>&1 | FileCheck %s + + +define void @foo() !dbg !2 { +entry: + %a = alloca i32 + ; CHECK: conflicting debug info for argument + #dbg_value(i32 0, !3, !DIExpression(), !6) + #dbg_declare(ptr %a, !4, !DIExpression(), !6) + ret void, !dbg !6 +} + +; CHECK: warning: ignoring invalid debug info + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", emissionKind: FullDebug) +!1 = !DIFile(filename: "x.c", directory: "/") +!2 = distinct !DISubprogram(name: "foo", scope: !0, isDefinition: true, unit: !0) +!3 = !DILocalVariable(name: "a", arg: 1, scope: !2, file: !1, line: 1, type: !5) +!4 = !DILocalVariable(name: "b", arg: 1, scope: !2, file: !1, line: 1, type: !5) +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!6 = !DILocation(line: 1, scope: !2) +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 1, !"Debug Info Version", i32 3} diff --git a/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll b/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll new file mode 100644 index 00000000000000..f5526030278eb9 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll @@ -0,0 +1,58 @@ +; RUN: llvm-as < %s -o %t +; RUN: llvm-dis < %t -o - | FileCheck %s +; Created at -O1 from: +; int sink(int); +; __attribute__((always_inline)) int f(int i) { return sink(i); } +; __attribute__((always_inline)) int g(int j) { return sink(j); } +; __attribute__((nodebug)) int nodebug(int k) { return f(k)+g(k); } +source_filename = "t.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +declare i32 @sink(i32) local_unnamed_addr + +define i32 @nodebug(i32 %k) local_unnamed_addr #2 { +entry: +; This should not set off the FnArg Verifier. The two variables are in differrent scopes. + #dbg_value(i32 %k, !12, !13, !14) + %call.k = tail call i32 @sink(i32 %k) #4, !dbg !15 + #dbg_value(i32 %k, !19, !13, !20) + %call.k3 = tail call i32 @sink(i32 %k) #4, !dbg !21 + %add = add nsw i32 %call.k3, %call.k + ret i32 %add +} + +; Function Attrs: nounwind readnone + +attributes #2 = { nounwind ssp uwtable } +attributes #3 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297153) (llvm/trunk 297155)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 5.0.0 (trunk 297153) (llvm/trunk 297155)"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12} +; CHECK: !DILocalVariable(name: "i", arg: 1 +!12 = !DILocalVariable(name: "i", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!13 = !DIExpression() +!14 = !DILocation(line: 2, column: 42, scope: !7) +!15 = !DILocation(line: 2, column: 54, scope: !7) +!16 = !DILocation(line: 2, column: 47, scope: !7) +!17 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !18) +!18 = !{!19} +; CHECK: !DILocalVariable(name: "j", arg: 1 +!19 = !DILocalVariable(name: "j", arg: 1, scope: !17, file: !1, line: 3, type: !10) +!20 = !DILocation(line: 3, column: 42, scope: !17) +!21 = !DILocation(line: 3, column: 54, scope: !17) +!22 = !DILocation(line: 3, column: 47, scope: !17) diff --git a/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll b/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll new file mode 100644 index 00000000000000..f36cee5946e473 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as < %s -disable-output 2>&1 | FileCheck %s + +define void @foo(i32 %n) { +entry: + %0 = zext i32 %n to i64 + %vla = alloca i32, i64 %0, align 16 + #dbg_declare(ptr %vla, !19, !DIExpression(), !18) + ret void +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "vla.c", directory: "/path/to") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 5.0.1"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 20, type: !8, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!16, !19} +!12 = !DIExpression() +!16 = !DILocalVariable(name: "vla_expr", scope: !7, file: !1, line: 21, type: !17) +!17 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!18 = !DILocation(line: 21, column: 7, scope: !7) +!19 = !DILocalVariable(name: "vla", scope: !7, file: !1, line: 21, type: !20) +!20 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, align: 32, elements: !21) +!21 = !{!22} +; CHECK: Count must be signed constant or DIVariable or DIExpression +!22 = !DISubrange(count: !17) diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll new file mode 100644 index 00000000000000..9d400b892ce8c1 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record address/value +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !"" +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(!"", !DILocalVariable(scope: !1), !DIExpression(), !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll new file mode 100644 index 00000000000000..b52c15cb3f8816 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record expression +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !DILocalVariable(scope: !1), !{}, !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll new file mode 100644 index 00000000000000..db2b0e0a54e2bd --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record variable +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !{}, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll new file mode 100644 index 00000000000000..1839821ab14070 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +define void @foo() { +entry: + #dbg_value( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !{}) +; CHECK-LABEL: invalid #dbg record DILocation +; CHECK-NEXT: #dbg_value({{.*}}) + + #dbg_declare( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !{}) +; CHECK-LABEL: invalid #dbg record DILocation +; CHECK-NEXT: #dbg_declare({{.*}}) + + #dbg_value( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !DILocation(scope: !2)) +; CHECK-LABEL: mismatched subprogram between #dbg record variable and DILocation +; CHECK-NEXT: #dbg_value({{[^,]+}}, ![[VAR:[0-9]+]], {{[^,]+}}, ![[LOC:[0-9]+]] +; CHECK-NEXT: label %entry +; CHECK-NEXT: ptr @foo +; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]] +; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram( +; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]] +; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram( + + #dbg_declare( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !DILocation(scope: !2)) +; CHECK-LABEL: mismatched subprogram between #dbg record variable and DILocation +; CHECK-NEXT: #dbg_declare({{[^,]+}}, ![[VAR:[0-9]+]], {{.*[^,]+}}, ![[LOC:[0-9]+]] +; CHECK-NEXT: label %entry +; CHECK-NEXT: ptr @foo +; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]] +; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram( +; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]] +; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram( + + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram(name: "foo") +!2 = distinct !DISubprogram(name: "bar") diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll new file mode 100644 index 00000000000000..cbd93c1ce6a4d6 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record expression +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(ptr %s, !DILocalVariable(scope: !1), !{}, !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll new file mode 100644 index 00000000000000..b6fcde250526be --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record address/value +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !"" +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(!"", !DILocalVariable(scope: !1), !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll new file mode 100644 index 00000000000000..0a5fe79453d721 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record variable +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(ptr %s, !{}, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/set1.ll b/llvm/test/Verifier/RemoveDI/set1.ll new file mode 100644 index 00000000000000..d54ba8876c366c --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/set1.ll @@ -0,0 +1,62 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s + +define void @Main__Test() #0 !dbg !17 { +entry: + %as = alloca i64, align 8 + %bs = alloca i64, align 8 + br label %second, !dbg !21 + +second: ; preds = %entry + #dbg_declare(ptr %as, !22, !DIExpression(), !25) + #dbg_declare(ptr %bs, !26, !DIExpression(), !25) + store i64 36028797018972298, ptr %as, align 8, !dbg !28 + store i64 85, ptr %bs, align 8, !dbg !29 + ret void, !dbg !21 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + +!llvm.ident = !{!0} +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!14, !15, !16} + +!0 = !{!"versions- cm3: d5.10.0 llvm: 12.0"} +!1 = distinct !DICompileUnit(language: DW_LANG_Modula3, file: !2, producer: "cm3", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) +!2 = !DIFile(filename: "Main.m3", directory: "/home/peter/cm3/settest/src") +!3 = !{!4} +!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", scope: !2, file: !2, line: 11, size: 8, align: 8, elements: !5) +!5 = !{!6, !7, !8, !9, !10, !11, !12, !13} +!6 = !DIEnumerator(name: "alpha", value: 0) +!7 = !DIEnumerator(name: "beta", value: 1) +!8 = !DIEnumerator(name: "gamma", value: 2) +!9 = !DIEnumerator(name: "delta", value: 3) +!10 = !DIEnumerator(name: "epsilon", value: 4) +!11 = !DIEnumerator(name: "theta", value: 5) +!12 = !DIEnumerator(name: "psi", value: 6) +!13 = !DIEnumerator(name: "zeta", value: 7) +!14 = !{i64 2, !"Dwarf Version", i64 4} +!15 = !{i64 2, !"Debug Info Version", i64 3} +!16 = !{i64 2, !"wchar_size", i64 2} +!17 = distinct !DISubprogram(name: "Test", linkageName: "Main__Test", scope: !2, file: !2, line: 11, type: !18, scopeLine: 11, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !20) +!18 = !DISubroutineType(types: !19) +!19 = !{null} +!20 = !{} +!21 = !DILocation(line: 20, scope: !17) +!22 = !DILocalVariable(name: "as", scope: !17, file: !2, line: 11, type: !23) +; CHECK: invalid set base type +!23 = !DIDerivedType(tag: DW_TAG_set_type, name: "SS", scope: !2, file: !2, line: 11, baseType: !24, size: 64, align: 64) +!24 = !DIBasicType(name: "SR", size: 8, encoding: DW_ATE_signed) +!25 = !DILocation(line: 11, scope: !17) +!26 = !DILocalVariable(name: "bs", scope: !17, file: !2, line: 11, type: !27) +!27 = !DIDerivedType(tag: DW_TAG_set_type, name: "ST", scope: !2, file: !2, line: 11, baseType: !23, size: 64, align: 64) +!28 = !DILocation(line: 17, scope: !17) +!29 = !DILocation(line: 18, scope: !17) +!30 = distinct !DISubprogram(name: "Main_M3", linkageName: "Main_M3", scope: !2, file: !2, line: 22, type: !31, scopeLine: 22, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !20) +!31 = !DISubroutineType(types: !32) +!32 = !{!33, !35} +!33 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "ADDR", baseType: !34, size: 64, align: 64) +!34 = !DICompositeType(tag: DW_TAG_class_type, name: "ADDR__HeapObject", scope: !2, file: !2, line: 22, size: 64, align: 64, elements: !19, identifier: "AJWxb1") +!35 = !DIBasicType(name: "INTEGER", size: 64, encoding: DW_ATE_signed) +!36 = !DILocation(line: 23, scope: !30) +!37 = !DILocalVariable(name: "mode", arg: 1, scope: !30, file: !2, line: 22, type: !35) +!38 = !DILocation(line: 22, scope: !30) From 4b70d17bcffaffd75a5d8c420396f8dc755b4652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 7 Mar 2024 14:27:04 +0100 Subject: [PATCH 029/158] [clang-repl] Names declared in if conditions and for-init statements are local to the inner context (#84150) Make TopLevelStmtDecl a DeclContext so that variables defined in statements are attached to the TopLevelDeclContext. This fixes redefinition errors from variables declared in if conditions and for-init statements. These must be local to the inner context (C++ 3.3.2p4), but they had generated definitions on global scope instead. This PR makes the TopLevelStmtDecl looking more like a FunctionDecl and that's fine because the FunctionDecl is very close in terms of semantics. Additionally, ActOnForStmt() requires a CompoundScope when processing a NullStmt body. --------- Co-authored-by: Vassil Vassilev --- clang/include/clang/AST/Decl.h | 16 ++++++++++------ clang/include/clang/AST/DeclBase.h | 1 + clang/include/clang/Basic/DeclNodes.td | 2 +- clang/include/clang/Sema/Sema.h | 3 ++- clang/lib/AST/Decl.cpp | 11 ++++++++--- clang/lib/AST/DeclBase.cpp | 1 + clang/lib/Parse/ParseDecl.cpp | 24 ++++++++++++++++-------- clang/lib/Sema/SemaDecl.cpp | 16 +++++++++++++--- clang/test/Interpreter/execute-stmts.cpp | 24 +++++++++++++++++++++++- 9 files changed, 75 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 61117cc5ce71f9..a5879591f4c659 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4419,7 +4419,7 @@ class FileScopeAsmDecl : public Decl { /// /// \note This is used in libInterpreter, clang -cc1 -fincremental-extensions /// and in tools such as clang-repl. -class TopLevelStmtDecl : public Decl { +class TopLevelStmtDecl : public Decl, public DeclContext { friend class ASTDeclReader; friend class ASTDeclWriter; @@ -4427,7 +4427,7 @@ class TopLevelStmtDecl : public Decl { bool IsSemiMissing = false; TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S) - : Decl(TopLevelStmt, DC, L), Statement(S) {} + : Decl(TopLevelStmt, DC, L), DeclContext(TopLevelStmt), Statement(S) {} virtual void anchor(); @@ -4438,15 +4438,19 @@ class TopLevelStmtDecl : public Decl { SourceRange getSourceRange() const override LLVM_READONLY; Stmt *getStmt() { return Statement; } const Stmt *getStmt() const { return Statement; } - void setStmt(Stmt *S) { - assert(IsSemiMissing && "Operation supported for printing values only!"); - Statement = S; - } + void setStmt(Stmt *S); bool isSemiMissing() const { return IsSemiMissing; } void setSemiMissing(bool Missing = true) { IsSemiMissing = Missing; } static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == TopLevelStmt; } + + static DeclContext *castToDeclContext(const TopLevelStmtDecl *D) { + return static_cast(const_cast(D)); + } + static TopLevelStmtDecl *castFromDeclContext(const DeclContext *DC) { + return static_cast(const_cast(DC)); + } }; /// Represents a block literal declaration, which is like an diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 9a4736019d1b1b..76810a86a78a46 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -2120,6 +2120,7 @@ class DeclContext { case Decl::Block: case Decl::Captured: case Decl::ObjCMethod: + case Decl::TopLevelStmt: return true; default: return getDeclKind() >= Decl::firstFunction && diff --git a/clang/include/clang/Basic/DeclNodes.td b/clang/include/clang/Basic/DeclNodes.td index 8b1f415dd5fe2c..48396e85c5adac 100644 --- a/clang/include/clang/Basic/DeclNodes.td +++ b/clang/include/clang/Basic/DeclNodes.td @@ -95,7 +95,7 @@ def LinkageSpec : DeclNode, DeclContext; def Export : DeclNode, DeclContext; def ObjCPropertyImpl : DeclNode; def FileScopeAsm : DeclNode; -def TopLevelStmt : DeclNode; +def TopLevelStmt : DeclNode, DeclContext; def AccessSpec : DeclNode; def Friend : DeclNode; def FriendTemplate : DeclNode; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2d949f3fc9a718..592c7871a4a55d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3263,7 +3263,8 @@ class Sema final { Decl *ActOnFileScopeAsmDecl(Expr *expr, SourceLocation AsmLoc, SourceLocation RParenLoc); - Decl *ActOnTopLevelStmtDecl(Stmt *Statement); + TopLevelStmtDecl *ActOnStartTopLevelStmtDecl(Scope *S); + void ActOnFinishTopLevelStmtDecl(TopLevelStmtDecl *D, Stmt *Statement); void ActOnPopScope(SourceLocation Loc, Scope *S); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 59c039f1f8daeb..d681791d3920c3 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -5552,14 +5552,13 @@ FileScopeAsmDecl *FileScopeAsmDecl::CreateDeserialized(ASTContext &C, void TopLevelStmtDecl::anchor() {} TopLevelStmtDecl *TopLevelStmtDecl::Create(ASTContext &C, Stmt *Statement) { - assert(Statement); assert(C.getLangOpts().IncrementalExtensions && "Must be used only in incremental mode"); - SourceLocation BeginLoc = Statement->getBeginLoc(); + SourceLocation Loc = Statement ? Statement->getBeginLoc() : SourceLocation(); DeclContext *DC = C.getTranslationUnitDecl(); - return new (C, DC) TopLevelStmtDecl(DC, BeginLoc, Statement); + return new (C, DC) TopLevelStmtDecl(DC, Loc, Statement); } TopLevelStmtDecl *TopLevelStmtDecl::CreateDeserialized(ASTContext &C, @@ -5572,6 +5571,12 @@ SourceRange TopLevelStmtDecl::getSourceRange() const { return SourceRange(getLocation(), Statement->getEndLoc()); } +void TopLevelStmtDecl::setStmt(Stmt *S) { + assert(S); + Statement = S; + setLocation(Statement->getBeginLoc()); +} + void EmptyDecl::anchor() {} EmptyDecl *EmptyDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 10fe8bb97ce660..fcedb3cfd176a0 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1352,6 +1352,7 @@ DeclContext *DeclContext::getPrimaryContext() { case Decl::ExternCContext: case Decl::LinkageSpec: case Decl::Export: + case Decl::TopLevelStmt: case Decl::Block: case Decl::Captured: case Decl::OMPDeclareReduction: diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 81f1c711269445..64b234eb460d24 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -5678,24 +5678,32 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() { // Parse a top-level-stmt. Parser::StmtVector Stmts; ParsedStmtContext SubStmtCtx = ParsedStmtContext(); - Actions.PushFunctionScope(); + ParseScope FnScope(this, Scope::FnScope | Scope::DeclScope | + Scope::CompoundStmtScope); + TopLevelStmtDecl *TLSD = Actions.ActOnStartTopLevelStmtDecl(getCurScope()); StmtResult R = ParseStatementOrDeclaration(Stmts, SubStmtCtx); - Actions.PopFunctionScopeInfo(); if (!R.isUsable()) return nullptr; - SmallVector DeclsInGroup; - DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get())); + Actions.ActOnFinishTopLevelStmtDecl(TLSD, R.get()); if (Tok.is(tok::annot_repl_input_end) && Tok.getAnnotationValue() != nullptr) { ConsumeAnnotationToken(); - cast(DeclsInGroup.back())->setSemiMissing(); + TLSD->setSemiMissing(); } - // Currently happens for things like -fms-extensions and use `__if_exists`. - for (Stmt *S : Stmts) - DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S)); + SmallVector DeclsInGroup; + DeclsInGroup.push_back(TLSD); + + // Currently happens for things like -fms-extensions and use `__if_exists`. + for (Stmt *S : Stmts) { + // Here we should be safe as `__if_exists` and friends are not introducing + // new variables which need to live outside file scope. + TopLevelStmtDecl *D = Actions.ActOnStartTopLevelStmtDecl(getCurScope()); + Actions.ActOnFinishTopLevelStmtDecl(D, S); + DeclsInGroup.push_back(D); + } return Actions.BuildDeclaratorGroup(DeclsInGroup); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6b81ee183cc440..67e56a917a51de 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20519,12 +20519,22 @@ Decl *Sema::ActOnFileScopeAsmDecl(Expr *expr, return New; } -Decl *Sema::ActOnTopLevelStmtDecl(Stmt *Statement) { - auto *New = TopLevelStmtDecl::Create(Context, Statement); - Context.getTranslationUnitDecl()->addDecl(New); +TopLevelStmtDecl *Sema::ActOnStartTopLevelStmtDecl(Scope *S) { + auto *New = TopLevelStmtDecl::Create(Context, /*Statement=*/nullptr); + CurContext->addDecl(New); + PushDeclContext(S, New); + PushFunctionScope(); + PushCompoundScope(false); return New; } +void Sema::ActOnFinishTopLevelStmtDecl(TopLevelStmtDecl *D, Stmt *Statement) { + D->setStmt(Statement); + PopCompoundScope(); + PopFunctionScopeInfo(); + PopDeclContext(); +} + void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name, IdentifierInfo* AliasName, SourceLocation PragmaLoc, diff --git a/clang/test/Interpreter/execute-stmts.cpp b/clang/test/Interpreter/execute-stmts.cpp index 2d4c17e0c91e66..433c6811777dac 100644 --- a/clang/test/Interpreter/execute-stmts.cpp +++ b/clang/test/Interpreter/execute-stmts.cpp @@ -9,7 +9,6 @@ //CODEGEN-CHECK-COUNT-2: define internal void @__stmts__ //CODEGEN-CHECK-NOT: define internal void @__stmts__ - extern "C" int printf(const char*,...); template T call() { printf("called\n"); return T(); } @@ -41,3 +40,26 @@ for (; i > 4; --i) { printf("i = %d\n", i); }; int j = i; printf("j = %d\n", j); // CHECK-NEXT: j = 4 + +{i = 0; printf("i = %d (global scope)\n", i);} +// CHECK-NEXT: i = 0 + +while (int i = 1) { printf("i = %d (while condition)\n", i--); break; } +// CHECK-NEXT: i = 1 + +if (int i = 2) printf("i = %d (if condition)\n", i); +// CHECK-NEXT: i = 2 + +switch (int i = 3) { default: printf("i = %d (switch condition)\n", i); } +// CHECK-NEXT: i = 3 + +for (int i = 4; i > 3; --i) printf("i = %d (for-init)\n", i); +// CHECK-NEXT: i = 4 + +for (const auto &i : "5") printf("i = %c (range-based for-init)\n", i); +// CHECK-NEXT: i = 5 + +int *aa=nullptr; +if (auto *b=aa) *b += 1; +while (auto *b=aa) ; +for (auto *b=aa; b; *b+=1) ; From 2acccf6717996bea8ade96dafdfc3343e9604694 Mon Sep 17 00:00:00 2001 From: aniplcc <157880614+aniplcc@users.noreply.github.com> Date: Thu, 7 Mar 2024 19:09:04 +0530 Subject: [PATCH 030/158] [Clang] Update value for __cpp_implicit_move (#84216) (#84228) Fixes #84216 --- clang/lib/Frontend/InitPreprocessor.cpp | 2 +- clang/test/Lexer/cxx-features.cpp | 2 +- clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 9b979d810fa127..48ad92063bd461 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -736,7 +736,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, } // C++23 features. if (LangOpts.CPlusPlus23) { - Builder.defineMacro("__cpp_implicit_move", "202011L"); + Builder.defineMacro("__cpp_implicit_move", "202207L"); Builder.defineMacro("__cpp_size_t_suffix", "202011L"); Builder.defineMacro("__cpp_if_consteval", "202106L"); Builder.defineMacro("__cpp_multidimensional_subscript", "202211L"); diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 2650a3a82252ba..9496746c6fd663 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -45,7 +45,7 @@ #endif -#if check(implicit_move, 0, 0, 0, 0, 0, 202011, 202011) +#if check(implicit_move, 0, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_implicit_move" #endif diff --git a/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp b/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp index d40491834d3988..9323dea24bd75b 100644 --- a/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp +++ b/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp @@ -9,7 +9,7 @@ #if __INCLUDE_LEVEL__ == 0 -#if __cpluscplus > 202002L && __cpp_implicit_move < 202011L +#if __cpluscplus > 202002L && __cpp_implicit_move < 202207L #error "__cpp_implicit_move not defined correctly" #endif From c669c0383cf982bec279f567662cc918576b6f34 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 7 Mar 2024 13:39:37 +0000 Subject: [PATCH 031/158] [TTI] SK_ExtractSubvector - Ensure we use the src / subvector types in the correct order Fixes typo in #84156, fixes buildbot assertion (most targets don't seem to care so tricky to create a testcase). --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 095c2ff1e58bdb..7f661bb4a1df20 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1382,9 +1382,9 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { SmallVector ExtractMask(Mask.size()); std::iota(ExtractMask.begin(), ExtractMask.end(), 0); - return ShuffleCost + TargetTTI->getShuffleCost( - TTI::SK_ExtractSubvector, VecTy, ExtractMask, - CostKind, 0, VecSrcTy, Operands); + return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, + VecSrcTy, ExtractMask, + CostKind, 0, VecTy); } if (Shuffle->isIdentity()) From 48dd118f56e007a173b30019e860f0bd373a8ff8 Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Thu, 7 Mar 2024 14:40:30 +0100 Subject: [PATCH 032/158] [Clang] Fix approved revision of P2266 --- clang/www/cxx_status.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 5ed27cdd43b368..fa00e7685610a6 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -197,7 +197,7 @@

C++23 implementation status

Simpler implicit move - P2266R1 + P2266R3 Clang 13 From 597be90f8b72fde59505f3650c20cf9e57b47d57 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 07:40:38 -0600 Subject: [PATCH 033/158] [Clang][NFC] Remove '--' separator in the linker wrapper usage (#84253) Summary: The very first version of the `clang-linker-wrapper` used `--` as a separator for the host and device arguments. I moved away from this towards a commandline parsing implementation years ago but never got around to officially removing this. --- clang/lib/Driver/ToolChains/Clang.cpp | 1 - clang/test/Driver/amdgpu-openmp-toolchain.c | 2 +- clang/test/Driver/linker-wrapper-image.c | 18 ++++----- clang/test/Driver/linker-wrapper-libs.c | 14 +++---- clang/test/Driver/linker-wrapper.c | 40 +++++++++---------- clang/test/Driver/openmp-offload-gpu.c | 2 +- clang/test/Driver/openmp-offload-infer.c | 2 +- .../clang-linker-wrapper/LinkerWrapperOpts.td | 4 -- 8 files changed, 39 insertions(+), 44 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index deb2dac80afe7c..e63e8a8e2e0e4b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8892,7 +8892,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // Add the linker arguments to be forwarded by the wrapper. CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") + LinkCommand->getExecutable())); - CmdArgs.push_back("--"); for (const char *LinkArg : LinkCommand->getArguments()) CmdArgs.push_back(LinkArg); diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 4975e2f8a52399..849afb871ddbfc 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -11,7 +11,7 @@ // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" // CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"{{.*}}"-fcuda-is-device"{{.*}} // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index 08f860f6cab0de..75475264135224 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -8,11 +8,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF // OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] @@ -45,11 +45,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF // CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] @@ -145,11 +145,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF // HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index 2073092bdbcf9e..9a78200d7d3cfc 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -43,7 +43,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -65,7 +65,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -88,7 +88,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE // LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -109,7 +109,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK // LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 @@ -131,7 +131,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN // LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 @@ -154,7 +154,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED // LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -178,7 +178,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o --whole-archive %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 83df2b84adefed..c37f01189d0870 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -16,10 +16,10 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK // NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o @@ -28,7 +28,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG // NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g @@ -37,7 +37,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK // AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -46,7 +46,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS // AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps @@ -56,14 +56,14 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: llvm-ar rcs %t.a %t.o // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- --whole-archive %t.a --no-whole-archive \ +// RUN: --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK // CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \ -// RUN: --linker-path=/usr/bin/ld.lld -- -a -b -c %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-LINK +// RUN: --linker-path=/usr/bin/ld.lld -a -b -c %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-LINK // HOST-LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out // HOST-LINK-NOT: ld.lld{{.*}}-abc @@ -77,7 +77,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-obj.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY +// RUN: --linker-path=/usr/bin/ld %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY // STATIC-LIBRARY: clang{{.*}} -march=sm_70 // STATIC-LIBRARY-NOT: clang{{.*}} -march=sm_50 @@ -89,7 +89,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA // CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52 // CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70 @@ -104,7 +104,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR // CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin @@ -115,7 +115,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908 // HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a @@ -127,14 +127,14 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LINKER-ARGS // LINKER-ARGS: clang{{.*}}--target=amdgcn-amd-amdhsa{{.*}}a // LINKER-ARGS: clang{{.*}}--target=nvptx64-nvidia-cuda{{.*}}a b // RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \ -// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b \ // RUN: -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY // MISSING-LIBRARY: error: unable to find library -ldummy @@ -144,7 +144,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND // CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.bc @@ -152,7 +152,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-windows-msvc -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-windows-msvc --dry-run \ -// RUN: --linker-path=/usr/bin/lld-link -- %t.o -libpath:./ -out:a.exe 2>&1 | FileCheck %s --check-prefix=COFF +// RUN: --linker-path=/usr/bin/lld-link %t.o -libpath:./ -out:a.exe 2>&1 | FileCheck %s --check-prefix=COFF // COFF: "/usr/bin/lld-link" {{.*}}.o -libpath:./ -out:a.exe {{.*}}openmp.image.wrapper{{.*}} @@ -167,7 +167,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack- // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID +// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -183,7 +183,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL +// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -193,7 +193,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK // RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu @@ -205,7 +205,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-HIP // RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa @@ -218,7 +218,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA // RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index f7b06c9ec59580..d705be44e595d8 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -233,7 +233,7 @@ // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c index 9a949f52e2e97d..50333293eb7dbd 100644 --- a/clang/test/Driver/openmp-offload-infer.c +++ b/clang/test/Driver/openmp-offload-infer.c @@ -13,7 +13,7 @@ // CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx803" // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ // RUN: --offload-arch=sm_70 --offload-arch=gfx908:sramecc+:xnack- \ diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 763426570c2a6f..2c6a788cf23a38 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -84,10 +84,6 @@ def linker_arg_EQ : Joined<["--"], "linker-arg=">, Flags<[DeviceOnlyOption, HelpHidden]>, HelpText<"An extra argument to be passed to the linker">; -// Separator between the linker wrapper and host linker flags. -def separator : Flag<["--"], "">, Flags<[WrapperOnlyOption]>, - HelpText<"The separator for the wrapped linker arguments">; - // Arguments for the LLVM backend. def mllvm : Separate<["-"], "mllvm">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Arguments passed to the LLVM invocation">; From 4cfd4a7896b5fd50274ec8573c259d7ad41741de Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 7 Mar 2024 13:53:02 +0000 Subject: [PATCH 034/158] [LAA] Add test case for #82665. Test case for https://github.com/llvm/llvm-project/issues/82665. --- .../underlying-object-loop-varying-phi.ll | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll new file mode 100644 index 00000000000000..1a5a6ac08d4045 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Test case for https://github.com/llvm/llvm-project/issues/82665. +define void @indirect_ptr_recurrences_read_write(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4, !tbaa !10 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @indirect_ptr_recurrences_read_only_loop(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_only_loop' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %xor, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %xor +} + +define void @indirect_ptr_recurrences_read_write_may_alias_no_tbaa(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_no_tbaa' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_alias_different_obj(ptr %A, ptr %B, ptr %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_noalias_different_obj(ptr %A, ptr %B, ptr noalias %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_noalias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + + +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"int", !8, i64 0} From 61b13e0dfe1b476d9bf0fe477983be8471cfd26b Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 7 Mar 2024 09:26:16 -0500 Subject: [PATCH 035/158] [ClangOffloadBundler] fix unbundling archive (#84195) When unbundling an archive, need to save the content of each object file to a temporary file before passing it to llvm-objcopy, instead of passing the original input archive file to llvm-objcopy. Also allows extracting host bundles for archives. Fixes: https://github.com/llvm/llvm-project/issues/83509 --- clang/lib/Driver/OffloadBundler.cpp | 30 ++++++++++++++++----- clang/test/Driver/clang-offload-bundler.c | 32 +++++++++++++++++++++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 99a34d25cfcd56..f9eadfaec88dec 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -590,7 +590,8 @@ class ObjectFileHandler final : public FileHandler { // Copy fat object contents to the output when extracting host bundle. std::string ModifiedContent; if (Content.size() == 1u && Content.front() == 0) { - auto HostBundleOrErr = getHostBundle(); + auto HostBundleOrErr = getHostBundle( + StringRef(Input.getBufferStart(), Input.getBufferSize())); if (!HostBundleOrErr) return HostBundleOrErr.takeError(); @@ -700,7 +701,7 @@ class ObjectFileHandler final : public FileHandler { return Error::success(); } - Expected getHostBundle() { + Expected getHostBundle(StringRef Input) { TempFileHandlerRAII TempFiles; auto ModifiedObjPathOrErr = TempFiles.Create(std::nullopt); @@ -715,7 +716,24 @@ class ObjectFileHandler final : public FileHandler { ObjcopyArgs.push_back("--regex"); ObjcopyArgs.push_back("--remove-section=__CLANG_OFFLOAD_BUNDLE__.*"); ObjcopyArgs.push_back("--"); - ObjcopyArgs.push_back(BundlerConfig.InputFileNames.front()); + + StringRef ObjcopyInputFileName; + // When unbundling an archive, the content of each object file in the + // archive is passed to this function by parameter Input, which is different + // from the content of the original input archive file, therefore it needs + // to be saved to a temporary file before passed to llvm-objcopy. Otherwise, + // Input is the same as the content of the original input file, therefore + // temporary file is not needed. + if (StringRef(BundlerConfig.FilesType).starts_with("a")) { + auto InputFileOrErr = + TempFiles.Create(ArrayRef(Input.data(), Input.size())); + if (!InputFileOrErr) + return InputFileOrErr.takeError(); + ObjcopyInputFileName = *InputFileOrErr; + } else + ObjcopyInputFileName = BundlerConfig.InputFileNames.front(); + + ObjcopyArgs.push_back(ObjcopyInputFileName); ObjcopyArgs.push_back(ModifiedObjPath); if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs)) @@ -1628,10 +1646,8 @@ Error OffloadBundler::UnbundleArchive() { while (!CodeObject.empty()) { SmallVector CompatibleTargets; auto CodeObjectInfo = OffloadTargetInfo(CodeObject, BundlerConfig); - if (CodeObjectInfo.hasHostKind()) { - // Do nothing, we don't extract host code yet. - } else if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets, - BundlerConfig)) { + if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets, + BundlerConfig)) { std::string BundleData; raw_string_ostream DataStream(BundleData); if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer)) diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index 9d8b81ee9806ee..f3cd2493e05277 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -13,6 +13,19 @@ // RUN: obj2yaml %t.o > %t.o.yaml // RUN: %clang -O0 -target %itanium_abi_triple %s -emit-ast -o %t.ast +// RUN: echo 'void a() {}' >%t.a.cpp +// RUN: echo 'void b() {}' >%t.b.cpp +// RUN: %clang -target %itanium_abi_triple %t.a.cpp -c -o %t.a.o +// RUN: %clang -target %itanium_abi_triple %t.b.cpp -c -o %t.b.o +// +// Remove .llvm_addrsig section since its offset changes after llvm-objcopy +// removes clang-offload-bundler sections, therefore not good for comparison. +// +// RUN: llvm-objcopy --remove-section=.llvm_addrsig %t.a.o +// RUN: llvm-objcopy --remove-section=.llvm_addrsig %t.b.o +// RUN: obj2yaml %t.a.o > %t.a.yaml +// RUN: obj2yaml %t.b.o > %t.b.yaml + // // Generate an empty file to help with the checks of empty files. // @@ -414,6 +427,25 @@ // HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 // HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 +// +// Check unbundling archive for host target +// +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900 \ +// RUN: -input=%t.a.o -input=%t.tgt1 -output=%t.a.bundled.o +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900 \ +// RUN: -input=%t.b.o -input=%t.tgt1 -output=%t.b.bundled.o +// RUN: rm -f %t.bundled.a +// RUN: llvm-ar cr %t.bundled.a %t.a.bundled.o %t.b.bundled.o +// RUN: cp %t.bundled.a %t.bundled.a.bak +// RUN: clang-offload-bundler -unbundle --targets=host-%itanium_abi_triple -type=a -input=%t.bundled.a -output=%t.host.a +// RUN: rm -f *%itanium_abi_triple*.a.bundled.o *%itanium_abi_triple*.b.bundled.o +// RUN: llvm-ar -x %t.host.a +// RUN: diff %t.bundled.a %t.bundled.a.bak +// RUN: obj2yaml *%itanium_abi_triple*.a.bundled.o > %t.a.unbundled.yaml +// RUN: diff %t.a.unbundled.yaml %t.a.yaml +// RUN: obj2yaml *%itanium_abi_triple*.b.bundled.o > %t.b.unbundled.yaml +// RUN: diff %t.b.unbundled.yaml %t.b.yaml +// // Check clang-offload-bundler reporting an error when trying to unbundle an archive but // the input file is not an archive. // From e4d4cfa5a0111372dff2b01126545cf3139ee40b Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 7 Mar 2024 09:32:43 -0500 Subject: [PATCH 036/158] [libc++] Fixes time formatter test output for Linux on PowerPC (#75526) Fix output to match actual. --- .../test/std/time/time.syn/formatter.file_time.pass.cpp | 8 ++++++-- .../test/std/time/time.syn/formatter.local_time.pass.cpp | 8 ++++++-- libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp | 8 ++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp index aec6e78d994da9..b07282593d759c 100644 --- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp @@ -821,7 +821,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -839,7 +841,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), diff --git a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp index 71dd7fba9fb701..45c3a12ea35cb2 100644 --- a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp @@ -820,7 +820,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -838,7 +840,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp index ebc45c7e87351f..2fed270cbade72 100644 --- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp @@ -817,7 +817,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -835,7 +837,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), From f355cd6f6c51580316e1e88ef5534bd2f8cfa498 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Thu, 7 Mar 2024 15:48:11 +0100 Subject: [PATCH 037/158] [mlir][EmitC] Allow further ops within expressions (#84284) This adds the `CExpression` trait to additional ops to allow to use these ops within the expression operation. Furthermore, the operator precedence is defined for those ops. --- mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 22 +++++++++-------- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 26 ++++++++++++++------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index 02ab73fa2ca56b..db0e2d10960d72 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -106,7 +106,7 @@ def EmitC_ApplyOp : EmitC_Op<"apply", [CExpression]> { let hasVerifier = 1; } -def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", []> { +def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", [CExpression]> { let summary = "Bitwise and operation"; let description = [{ With the `bitwise_and` operation the bitwise operator & (and) can @@ -124,7 +124,8 @@ def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", []> { }]; } -def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", []> { +def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", + [CExpression]> { let summary = "Bitwise left shift operation"; let description = [{ With the `bitwise_left_shift` operation the bitwise operator << @@ -142,7 +143,7 @@ def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", []> { }]; } -def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", []> { +def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", [CExpression]> { let summary = "Bitwise not operation"; let description = [{ With the `bitwise_not` operation the bitwise operator ~ (not) can @@ -160,7 +161,7 @@ def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", []> { }]; } -def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", []> { +def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", [CExpression]> { let summary = "Bitwise or operation"; let description = [{ With the `bitwise_or` operation the bitwise operator | (or) @@ -178,7 +179,8 @@ def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", []> { }]; } -def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", []> { +def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", + [CExpression]> { let summary = "Bitwise right shift operation"; let description = [{ With the `bitwise_right_shift` operation the bitwise operator >> @@ -196,7 +198,7 @@ def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", []> { }]; } -def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", []> { +def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", [CExpression]> { let summary = "Bitwise xor operation"; let description = [{ With the `bitwise_xor` operation the bitwise operator ^ (xor) @@ -515,7 +517,7 @@ def EmitC_ForOp : EmitC_Op<"for", } def EmitC_CallOp : EmitC_Op<"call", - [CallOpInterface, + [CallOpInterface, CExpression, DeclareOpInterfaceMethods]> { let summary = "call operation"; let description = [{ @@ -771,7 +773,7 @@ def EmitC_LiteralOp : EmitC_Op<"literal", [Pure]> { let assemblyFormat = "$value attr-dict `:` type($result)"; } -def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", []> { +def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", [CExpression]> { let summary = "Logical and operation"; let description = [{ With the `logical_and` operation the logical operator && (and) can @@ -792,7 +794,7 @@ def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", []> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", []> { +def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", [CExpression]> { let summary = "Logical not operation"; let description = [{ With the `logical_not` operation the logical operator ! (negation) can @@ -813,7 +815,7 @@ def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", []> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", []> { +def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", [CExpression]> { let summary = "Logical or operation"; let description = [{ With the `logical_or` operation the logical operator || (inclusive or) diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 4bc707c43ad92f..95513cb0fb2ebc 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -71,9 +71,17 @@ inline LogicalResult interleaveCommaWithError(const Container &c, /// imply higher precedence. static FailureOr getOperatorPrecedence(Operation *operation) { return llvm::TypeSwitch>(operation) - .Case([&](auto op) { return 11; }) - .Case([&](auto op) { return 13; }) - .Case([&](auto op) { return 13; }) + .Case([&](auto op) { return 12; }) + .Case([&](auto op) { return 15; }) + .Case([&](auto op) { return 7; }) + .Case([&](auto op) { return 11; }) + .Case([&](auto op) { return 15; }) + .Case([&](auto op) { return 5; }) + .Case([&](auto op) { return 11; }) + .Case([&](auto op) { return 6; }) + .Case([&](auto op) { return 16; }) + .Case([&](auto op) { return 16; }) + .Case([&](auto op) { return 15; }) .Case([&](auto op) -> FailureOr { switch (op.getPredicate()) { case emitc::CmpPredicate::eq: @@ -89,11 +97,13 @@ static FailureOr getOperatorPrecedence(Operation *operation) { } return op->emitError("unsupported cmp predicate"); }) - .Case([&](auto op) { return 12; }) - .Case([&](auto op) { return 12; }) - .Case([&](auto op) { return 12; }) - .Case([&](auto op) { return 11; }) - .Case([&](auto op) { return 14; }) + .Case([&](auto op) { return 13; }) + .Case([&](auto op) { return 4; }) + .Case([&](auto op) { return 15; }) + .Case([&](auto op) { return 3; }) + .Case([&](auto op) { return 13; }) + .Case([&](auto op) { return 13; }) + .Case([&](auto op) { return 12; }) .Default([](auto op) { return op->emitError("unsupported operation"); }); } From 043a020688765ad1ed27df718f908cfd0dc353a3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 08:47:57 -0600 Subject: [PATCH 038/158] [libc] Fix missing standard definitions in the GPU config Summary: Some dependencies on the standard C extensions are added transitively. This patch adds the new values. --- libc/config/gpu/api.td | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index dbd212be56a3f1..607b8b6d5900c8 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -4,6 +4,7 @@ include "spec/stdc.td" include "spec/posix.td" include "spec/gpu_ext.td" include "spec/gnu_ext.td" +include "spec/stdc_ext.td" include "spec/llvm_libc_ext.td" def AssertMacro : MacroDef<"assert"> { From 2b8aaef09e2fd0b2a5581e198a73579c6939c717 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 7 Mar 2024 09:50:29 -0500 Subject: [PATCH 039/158] [GISEL] Add IRTranslation for shufflevector on scalable vector types (#80378) This patch is stacked on https://github.com/llvm/llvm-project/pull/80372, https://github.com/llvm/llvm-project/pull/80307, and https://github.com/llvm/llvm-project/pull/80306. ShuffleVector on scalable vector types gets IRTranslate'd to G_SPLAT_VECTOR since a ShuffleVector that has operates on scalable vectors is a splat vector where the value of the splat vector is the 0th element of the first operand, because the index mask operand is the zeroinitializer (undef and poison are treated as zeroinitializer here). This is analogous to what happens in SelectionDAG for ShuffleVector. `buildSplatVector` is renamed to`buildBuildVectorSplatVector`. I did not make this a separate patch because it would cause problems to revert that change without reverting this change too. --- llvm/docs/GlobalISel/GenericOpcode.rst | 5 + .../CodeGen/GlobalISel/MachineIRBuilder.h | 12 +- llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 7 + llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 4 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 27 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 16 +- llvm/lib/CodeGen/MachineVerifier.cpp | 18 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- .../GlobalISel/legalizer-info-validation.mir | 3 + .../GlobalISel/irtranslator/shufflevector.ll | 1774 +++++++++++++++++ .../MachineVerifier/test_g_splat_vector.mir | 27 + .../GlobalISel/LegalizerHelperTest.cpp | 4 +- .../CodeGen/GlobalISel/PatternMatchTest.cpp | 6 +- 15 files changed, 1890 insertions(+), 21 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll create mode 100644 llvm/test/MachineVerifier/test_g_splat_vector.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 33b0152bd7b49c..dda367607d0432 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -639,6 +639,11 @@ Concatenate two vectors and shuffle the elements according to the mask operand. The mask operand should be an IR Constant which exactly matches the corresponding mask for the IR shufflevector instruction. +G_SPLAT_VECTOR +^^^^^^^^^^^^^^^^ + +Create a vector where all elements are the scalar from the source operand. + Vector Reduction Operations --------------------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 1387a0a37561c4..6762b1b360d5e8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1063,8 +1063,7 @@ class MachineIRBuilder { /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements - MachineInstrBuilder buildSplatVector(const DstOp &Res, - const SrcOp &Src); + MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src); /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ... /// @@ -1099,6 +1098,15 @@ class MachineIRBuilder { MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef Mask); + /// Build and insert \p Res = G_SPLAT_VECTOR \p Val + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with vector type. + /// \pre \p Val must be a generic virtual register with scalar type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val); + /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 6aded2ceebe13a..94fba491148b2e 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -736,6 +736,9 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) /// Generic shufflevector. HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) +/// Generic splatvector. +HANDLE_TARGET_OPCODE(G_SPLAT_VECTOR) + /// Generic count trailing zeroes. HANDLE_TARGET_OPCODE(G_CTTZ) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index d2036e478d18f2..d967885aa2d758 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1450,6 +1450,13 @@ def G_SHUFFLE_VECTOR: GenericInstruction { let hasSideEffects = false; } +// Generic splatvector. +def G_SPLAT_VECTOR: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$val); + let hasSideEffects = false; +} + //------------------------------------------------------------------------------ // Vector reductions //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 64e2d517e3b9c4..1869e0d41a51f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -309,7 +309,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); @@ -336,7 +336,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7c986dbbc2c7c8..365870f540daeb 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1598,10 +1598,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // We might need to splat the base pointer into a vector if the offsets // are vectors. if (WantSplatVector && !PtrTy.isVector()) { - BaseReg = - MIRBuilder - .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) - .getReg(0); + BaseReg = MIRBuilder + .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy), + BaseReg) + .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); OffsetIRTy = DL->getIndexType(PtrIRTy); @@ -1639,8 +1639,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder.buildSplatVector( - OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + IdxReg = MIRBuilder + .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy), + IdxReg) + .getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -2997,6 +2999,19 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { + // A ShuffleVector that has operates on scalable vectors is a splat vector + // where the value of the splat vector is the 0th element of the first + // operand, since the index mask operand is the zeroinitializer (undef and + // poison are treated as zeroinitializer here). + if (U.getOperand(0)->getType()->isScalableTy()) { + Value *Op0 = U.getOperand(0); + auto SplatVal = MIRBuilder.buildExtractVectorElementConstant( + LLT::scalar(Op0->getType()->getScalarSizeInBits()), + getOrCreateVReg(*Op0), 0); + MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal); + return true; + } + ArrayRef Mask; if (auto *SVI = dyn_cast(&U)) Mask = SVI->getShuffleMask(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d016e684c48f6..2ec47f72aca39a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8391,7 +8391,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { // For vector types create a G_BUILD_VECTOR. if (Ty.isVector()) - Val = MIB.buildSplatVector(Ty, Val).getReg(0); + Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0); return Val; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index cdd605a5221ad8..a5a136e2effc60 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -326,7 +326,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, auto Const = buildInstr(TargetOpcode::G_CONSTANT) .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addCImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_CONSTANT); @@ -363,7 +363,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addFPImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); @@ -711,8 +711,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, - const SrcOp &Src) { +MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res, + const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } @@ -742,6 +742,14 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); } +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { + LLT DstTy = Res.getLLTTy(*getMRI()); + assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && + "Expected Src to match Dst elt ty"); + return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); +} + MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1d0757c5d7f5f5..ecb3bd33bdfd49 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1640,6 +1640,24 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + + case TargetOpcode::G_SPLAT_VECTOR: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + + if (!DstTy.isScalableVector()) + report("Destination type must be a scalable vector", MI); + + if (!SrcTy.isScalar()) + report("Source type must be a scalar", MI); + + if (DstTy.getScalarType() != SrcTy) + report("Element type of the destination must be the same type as the " + "source type", + MI); + + break; + } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 750d70c03eabd7..4713bd605c243b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20920,7 +20920,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { unsigned Op = Inst.getOpcode(); if (Op == Instruction::Add || Op == Instruction::Sub || Op == Instruction::And || Op == Instruction::Or || - Op == Instruction::Xor || Op == Instruction::InsertElement) + Op == Instruction::Xor || Op == Instruction::InsertElement || + Op == Instruction::Xor || Op == Instruction::ShuffleVector) return false; if (Inst.getType()->isScalableTy()) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d87704cf45d5d5..7774158e15ec58 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -625,6 +625,9 @@ # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_SPLAT_VECTOR (opcode 217): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll new file mode 100644 index 00000000000000..df7778899b0d09 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll @@ -0,0 +1,1774 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s + +define @shufflevector_nxv1i1_0() { + ; RV32-LABEL: name: shufflevector_nxv1i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i1_1() { + ; RV32-LABEL: name: shufflevector_nxv1i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i1_0() { + ; RV32-LABEL: name: shufflevector_nxv2i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i1_1() { + ; RV32-LABEL: name: shufflevector_nxv2i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i1_0() { + ; RV32-LABEL: name: shufflevector_nxv4i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i1_1() { + ; RV32-LABEL: name: shufflevector_nxv4i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i1_0() { + ; RV32-LABEL: name: shufflevector_nxv8i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i1_1() { + ; RV32-LABEL: name: shufflevector_nxv8i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i1_0() { + ; RV32-LABEL: name: shufflevector_nxv16i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i1_1() { + ; RV32-LABEL: name: shufflevector_nxv16i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i8_0() { + ; RV32-LABEL: name: shufflevector_nxv1i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i8_1() { + ; RV32-LABEL: name: shufflevector_nxv1i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i8_0() { + ; RV32-LABEL: name: shufflevector_nxv2i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i8_1() { + ; RV32-LABEL: name: shufflevector_nxv2i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i8_0() { + ; RV32-LABEL: name: shufflevector_nxv4i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i8_1() { + ; RV32-LABEL: name: shufflevector_nxv4i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i8_0() { + ; RV32-LABEL: name: shufflevector_nxv8i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i8_1() { + ; RV32-LABEL: name: shufflevector_nxv8i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i8_0() { + ; RV32-LABEL: name: shufflevector_nxv16i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i8_1() { + ; RV32-LABEL: name: shufflevector_nxv16i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i16_0() { + ; RV32-LABEL: name: shufflevector_nxv1i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i16_1() { + ; RV32-LABEL: name: shufflevector_nxv1i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i16_0() { + ; RV32-LABEL: name: shufflevector_nxv2i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i16_1() { + ; RV32-LABEL: name: shufflevector_nxv2i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i16_0() { + ; RV32-LABEL: name: shufflevector_nxv4i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i16_1() { + ; RV32-LABEL: name: shufflevector_nxv4i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i16_0() { + ; RV32-LABEL: name: shufflevector_nxv8i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i16_1() { + ; RV32-LABEL: name: shufflevector_nxv8i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i16_0() { + ; RV32-LABEL: name: shufflevector_nxv16i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i16_1() { + ; RV32-LABEL: name: shufflevector_nxv16i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i32_0() { + ; RV32-LABEL: name: shufflevector_nxv1i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i32_1() { + ; RV32-LABEL: name: shufflevector_nxv1i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i32_0() { + ; RV32-LABEL: name: shufflevector_nxv2i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i32_1() { + ; RV32-LABEL: name: shufflevector_nxv2i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i32_0() { + ; RV32-LABEL: name: shufflevector_nxv4i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i32_1() { + ; RV32-LABEL: name: shufflevector_nxv4i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i32_0() { + ; RV32-LABEL: name: shufflevector_nxv8i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i32_1() { + ; RV32-LABEL: name: shufflevector_nxv8i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i32_0() { + ; RV32-LABEL: name: shufflevector_nxv16i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i32_1() { + ; RV32-LABEL: name: shufflevector_nxv16i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i64_0() { + ; RV32-LABEL: name: shufflevector_nxv1i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i64_1() { + ; RV32-LABEL: name: shufflevector_nxv1i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i64_0() { + ; RV32-LABEL: name: shufflevector_nxv2i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i64_1() { + ; RV32-LABEL: name: shufflevector_nxv2i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i64_0() { + ; RV32-LABEL: name: shufflevector_nxv4i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i64_1() { + ; RV32-LABEL: name: shufflevector_nxv4i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i64_0() { + ; RV32-LABEL: name: shufflevector_nxv8i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i64_1() { + ; RV32-LABEL: name: shufflevector_nxv8i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i64_0() { + ; RV32-LABEL: name: shufflevector_nxv16i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i64_1() { + ; RV32-LABEL: name: shufflevector_nxv16i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8, $v16m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV32-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8, $v16m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV64-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + + + diff --git a/llvm/test/MachineVerifier/test_g_splat_vector.mir b/llvm/test/MachineVerifier/test_g_splat_vector.mir new file mode 100644 index 00000000000000..0d1d8a3e6dcc64 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_splat_vector.mir @@ -0,0 +1,27 @@ +# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: g_splat_vector +tracksRegLiveness: true +liveins: +body: | + bb.0: + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_() = G_IMPLICIT_DEF + + ; CHECK: Destination type must be a scalable vector + %3:_(s32) = G_SPLAT_VECTOR %0 + + ; CHECK: Destination type must be a scalable vector + %4:_(<2 x s32>) = G_SPLAT_VECTOR %0 + + ; CHECK: Source type must be a scalar + %5:_() = G_SPLAT_VECTOR %1 + + ; CHECK: Source type must be a scalar + %6:_() = G_SPLAT_VECTOR %2 + + ; CHECK: Element type of the destination must be the same type as the source type + %7:_() = G_SPLAT_VECTOR %0 +... diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 73837279701a97..33155d2c9a9642 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -147,9 +147,9 @@ TEST_F(AArch64GISelMITest, LowerRotatesVector) { LLT S32 = LLT::scalar(32); LLT V4S32 = LLT::fixed_vector(4, S32); auto SrcTrunc = B.buildTrunc(S32, Copies[0]); - auto Src = B.buildSplatVector(V4S32, SrcTrunc); + auto Src = B.buildSplatBuildVector(V4S32, SrcTrunc); auto AmtTrunc = B.buildTrunc(S32, Copies[1]); - auto Amt = B.buildSplatVector(V4S32, AmtTrunc); + auto Amt = B.buildSplatBuildVector(V4S32, AmtTrunc); auto ROTR = B.buildInstr(TargetOpcode::G_ROTR, {V4S32}, {Src, Amt}); AInfo Info(MF->getSubtarget()); diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index f52e49df0bcdee..59a86fa5646f36 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -61,7 +61,7 @@ TEST_F(AArch64GISelMITest, MatchIntConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); int64_t Cst; EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_ICstOrSplat(Cst))); EXPECT_EQ(Cst, 42); @@ -625,7 +625,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_SpecificICstSplat(42))); @@ -655,7 +655,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantOrSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE( From 3239b4dcfebbaa3eeaff9258893a6674050d8354 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Thu, 7 Mar 2024 08:53:18 -0600 Subject: [PATCH 040/158] [lldb][test] Enforce `pexpect` system availability by default (#84270) This switches the default of `LLDB_TEST_USE_VENDOR_PACKAGES` from `ON` to `OFF` in preparation for eventually deleting it. All known LLDB buildbots have this package installed, so flipping the default will uncover any other users. If this breaks anything, the preferred fix is to install `pexpect` on the host system. The second fix is to build with cmake option `-DLLDB_TEST_USE_VENDOR_PACKAGES=ON` as a temporary measure until `pexpect` can be installed. If neither of those work, reverting this patch is OK. --- lldb/cmake/modules/LLDBConfig.cmake | 2 +- lldb/test/CMakeLists.txt | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 93c8ffe4b7d8a0..5d62213c3f5838 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -68,7 +68,7 @@ option(LLDB_SKIP_DSYM "Whether to skip generating a dSYM when installing lldb." option(LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS "Fail to configure if certain requirements are not met for testing." OFF) option(LLDB_TEST_USE_VENDOR_PACKAGES - "Use packages from lldb/third_party/Python/module instead of system deps." ON) + "Use packages from lldb/third_party/Python/module instead of system deps." OFF) set(LLDB_GLOBAL_INIT_DIRECTORY "" CACHE STRING "Path to the global lldbinit directory. Relative paths are resolved relative to the diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 950643a5b8cc8e..0ef2eb1c42ce06 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -34,7 +34,9 @@ endif() # The "pexpect" package should come from the system environment, not from the # LLDB tree. However, we delay the deletion of it from the tree in case # users/buildbots don't have the package yet and need some time to install it. -if (NOT LLDB_TEST_USE_VENDOR_PACKAGES) +# Windows is configured to skip all pexpect tests, and guards all +# "import pexpect" calls, so we do not need pexpect installed there. +if (NOT LLDB_TEST_USE_VENDOR_PACKAGES AND NOT WIN32) unset(PY_pexpect_FOUND CACHE) lldb_find_python_module(pexpect) if (NOT PY_pexpect_FOUND) From 03588a27261f7ebea15af49268d2ec901fe1979e Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 7 Mar 2024 14:51:23 +0000 Subject: [PATCH 041/158] [lldb][test][FreeBSD] xfail TestPlatformConnect on AArch64 Details in the linked issue. Might fail on other architectures but I can't confirm, they can add to this if it does. --- .../API/commands/platform/connect/TestPlatformConnect.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lldb/test/API/commands/platform/connect/TestPlatformConnect.py b/lldb/test/API/commands/platform/connect/TestPlatformConnect.py index 6a0f036c007079..fc6c2ee98df44d 100644 --- a/lldb/test/API/commands/platform/connect/TestPlatformConnect.py +++ b/lldb/test/API/commands/platform/connect/TestPlatformConnect.py @@ -13,6 +13,13 @@ class TestPlatformProcessConnect(TestBase): @expectedFailureAll(hostoslist=["windows"], triple=".*-android") @skipIfDarwin # lldb-server not found correctly @expectedFailureAll(oslist=["windows"]) # process modules not loaded + # lldb-server platform times out waiting for the gdbserver port number to be + # written to the pipe, yet it seems the gdbserver already has written it. + @expectedFailureAll( + archs=["aarch64"], + oslist=["freebsd"], + bugnumber="https://github.com/llvm/llvm-project/issues/84327", + ) @add_test_categories(["lldb-server"]) def test_platform_process_connect(self): self.build() From 9e0f5909d0af3911b19bb1f97fb400c3ce431f63 Mon Sep 17 00:00:00 2001 From: SahilPatidar Date: Thu, 7 Mar 2024 20:45:17 +0530 Subject: [PATCH 042/158] [DAG] Fix Failure to reassociate SMAX/SMIN/UMAX/UMIN (#82175) Resolve #58110 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 + .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 84 ++++++------ .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 121 +++++++++--------- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 30 ++--- llvm/test/CodeGen/X86/combine-smin.ll | 6 - llvm/test/CodeGen/X86/combine-umax.ll | 8 +- llvm/test/CodeGen/X86/combine-umin.ll | 8 +- 7 files changed, 127 insertions(+), 134 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cdcb7114640471..5476ef87971436 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5562,6 +5562,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; + // reassociate minmax + if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags())) + return RMINMAX; + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. if (!TLI.isOperationLegal(Opcode, VT) && diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 2ae031798f5bd6..2ae058128eaa00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -549,36 +549,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: .LBB42_2: -; CHECK-RV32-NEXT: mul a5, a3, a2 -; CHECK-RV32-NEXT: addi a6, a4, -32 -; CHECK-RV32-NEXT: sltu a4, a4, a6 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a6, a4, a6 -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB42_4 +; CHECK-RV32-NEXT: mul a6, a3, a2 +; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: sltu a7, a4, a5 +; CHECK-RV32-NEXT: addi a7, a7, -1 +; CHECK-RV32-NEXT: and a7, a7, a5 +; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: add a6, a1, a6 +; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: li a6, 16 +; CHECK-RV32-NEXT: li a7, 16 ; CHECK-RV32-NEXT: .LBB42_4: ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: sltu a6, a3, a5 -; CHECK-RV32-NEXT: addi a6, a6, -1 -; CHECK-RV32-NEXT: and a5, a6, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_6 +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV32-NEXT: addi a6, a3, -16 +; CHECK-RV32-NEXT: sltu a3, a3, a6 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a3, a3, a6 +; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: li a3, 16 +; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: .LBB42_6: -; CHECK-RV32-NEXT: mul a4, a3, a2 -; CHECK-RV32-NEXT: add a4, a1, a4 +; CHECK-RV32-NEXT: mul a5, a4, a2 +; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -599,36 +599,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: .LBB42_2: -; CHECK-RV64-NEXT: mul a5, a4, a2 -; CHECK-RV64-NEXT: addi a6, a3, -32 -; CHECK-RV64-NEXT: sltu a3, a3, a6 -; CHECK-RV64-NEXT: addi a3, a3, -1 -; CHECK-RV64-NEXT: and a6, a3, a6 -; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: bltu a6, a3, .LBB42_4 +; CHECK-RV64-NEXT: mul a6, a4, a2 +; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: sltu a7, a3, a5 +; CHECK-RV64-NEXT: addi a7, a7, -1 +; CHECK-RV64-NEXT: and a7, a7, a5 +; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: add a6, a1, a6 +; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: li a6, 16 +; CHECK-RV64-NEXT: li a7, 16 ; CHECK-RV64-NEXT: .LBB42_4: ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: sltu a6, a4, a5 -; CHECK-RV64-NEXT: addi a6, a6, -1 -; CHECK-RV64-NEXT: and a5, a6, a5 -; CHECK-RV64-NEXT: bltu a4, a3, .LBB42_6 +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV64-NEXT: addi a6, a4, -16 +; CHECK-RV64-NEXT: sltu a4, a4, a6 +; CHECK-RV64-NEXT: addi a4, a4, -1 +; CHECK-RV64-NEXT: and a4, a4, a6 +; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: li a4, 16 +; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB42_6: -; CHECK-RV64-NEXT: mul a3, a4, a2 -; CHECK-RV64-NEXT: add a3, a1, a3 +; CHECK-RV64-NEXT: mul a5, a3, a2 +; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index e7b74737239154..4f16ce28bbb7e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -310,23 +310,24 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: li a6, 64 ; CHECK-NEXT: .LBB16_4: ; CHECK-NEXT: addi a5, a1, 384 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: addi a6, a7, -32 -; CHECK-NEXT: sltu t0, a7, a6 -; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: and a6, t0, a6 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: addi t0, a6, -32 +; CHECK-NEXT: sltu a6, a6, t0 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, t0 ; CHECK-NEXT: addi t0, a6, -16 ; CHECK-NEXT: sltu t1, a6, t0 ; CHECK-NEXT: addi t1, t1, -1 @@ -364,14 +365,15 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: bltu a4, a3, .LBB16_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a5, 32 ; CHECK-NEXT: .LBB16_8: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a5, a4, a1 +; CHECK-NEXT: addi a1, a5, -16 +; CHECK-NEXT: sltu a5, a5, a1 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a1, a5, a1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -387,62 +389,63 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: bltu a7, a3, .LBB16_12 ; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a7, 32 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 56 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 56 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 56 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 56 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v8, v0 ; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a7, -16 -; CHECK-NEXT: sltu a4, a7, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a1, -16 +; CHECK-NEXT: sltu a1, a1, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index bb213c9276a3a9..618b875be56651 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -418,20 +418,20 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: sltu a5, a3, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5), v0.t -; CHECK-NEXT: addi a4, a2, -32 -; CHECK-NEXT: sltu a2, a2, a4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a4, a2, a4 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a4, a2, .LBB32_4 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: addi a3, a2, -32 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a4, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: @@ -440,11 +440,11 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t -; CHECK-NEXT: bltu a3, a2, .LBB32_6 +; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll index 87ae495f945e0a..b58934256a2092 100644 --- a/llvm/test/CodeGen/X86/combine-smin.ll +++ b/llvm/test/CodeGen/X86/combine-smin.ll @@ -70,9 +70,6 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE2-LABEL: test_v16i8_reassociation: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -81,21 +78,18 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: pminsb %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: pminsb %xmm1, %xmm0 -; SSE42-NEXT: pminsb %xmm1, %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) %2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll index 52bb9ee7fcb9f5..25f8ec891a2472 100644 --- a/llvm/test/CodeGen/X86/combine-umax.ll +++ b/llvm/test/CodeGen/X86/combine-umax.ll @@ -45,16 +45,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE-LABEL: test_v16i8_reassociation: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE-NEXT: pmaxub %xmm1, %xmm0 -; SSE-NEXT: pmaxub %xmm1, %xmm0 +; SSE-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> ) diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll index 5b3b7f942805d9..76dbcb50bf8c7c 100644 --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -62,16 +62,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE-LABEL: test_v16i8_reassociation: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE-NEXT: pminub %xmm1, %xmm0 -; SSE-NEXT: pminub %xmm1, %xmm0 +; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> ) From 552da2484390bb002522fc18124ac9fc19ab4b59 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 7 Mar 2024 10:16:31 -0500 Subject: [PATCH 043/158] Revert "[GISEL] Add IRTranslation for shufflevector on scalable vector types" (#84330) Reverts llvm/llvm-project#80378 causing Buildbot failures that did not show up with check-llvm or CI. --- llvm/docs/GlobalISel/GenericOpcode.rst | 5 - .../CodeGen/GlobalISel/MachineIRBuilder.h | 12 +- llvm/include/llvm/Support/TargetOpcodes.def | 3 - llvm/include/llvm/Target/GenericOpcodes.td | 7 - llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 4 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 27 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 16 +- llvm/lib/CodeGen/MachineVerifier.cpp | 18 - llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- .../GlobalISel/legalizer-info-validation.mir | 3 - .../GlobalISel/irtranslator/shufflevector.ll | 1774 ----------------- .../MachineVerifier/test_g_splat_vector.mir | 27 - .../GlobalISel/LegalizerHelperTest.cpp | 4 +- .../CodeGen/GlobalISel/PatternMatchTest.cpp | 6 +- 15 files changed, 21 insertions(+), 1890 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll delete mode 100644 llvm/test/MachineVerifier/test_g_splat_vector.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index dda367607d0432..33b0152bd7b49c 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -639,11 +639,6 @@ Concatenate two vectors and shuffle the elements according to the mask operand. The mask operand should be an IR Constant which exactly matches the corresponding mask for the IR shufflevector instruction. -G_SPLAT_VECTOR -^^^^^^^^^^^^^^^^ - -Create a vector where all elements are the scalar from the source operand. - Vector Reduction Operations --------------------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 6762b1b360d5e8..1387a0a37561c4 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1063,7 +1063,8 @@ class MachineIRBuilder { /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements - MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src); + MachineInstrBuilder buildSplatVector(const DstOp &Res, + const SrcOp &Src); /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ... /// @@ -1098,15 +1099,6 @@ class MachineIRBuilder { MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef Mask); - /// Build and insert \p Res = G_SPLAT_VECTOR \p Val - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res must be a generic virtual register with vector type. - /// \pre \p Val must be a generic virtual register with scalar type. - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val); - /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 94fba491148b2e..6aded2ceebe13a 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -736,9 +736,6 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) /// Generic shufflevector. HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) -/// Generic splatvector. -HANDLE_TARGET_OPCODE(G_SPLAT_VECTOR) - /// Generic count trailing zeroes. HANDLE_TARGET_OPCODE(G_CTTZ) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index d967885aa2d758..d2036e478d18f2 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1450,13 +1450,6 @@ def G_SHUFFLE_VECTOR: GenericInstruction { let hasSideEffects = false; } -// Generic splatvector. -def G_SPLAT_VECTOR: GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$val); - let hasSideEffects = false; -} - //------------------------------------------------------------------------------ // Vector reductions //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 1869e0d41a51f6..64e2d517e3b9c4 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -309,7 +309,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val)); + return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); @@ -336,7 +336,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val)); + return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 365870f540daeb..7c986dbbc2c7c8 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1598,10 +1598,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // We might need to splat the base pointer into a vector if the offsets // are vectors. if (WantSplatVector && !PtrTy.isVector()) { - BaseReg = MIRBuilder - .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy), - BaseReg) - .getReg(0); + BaseReg = + MIRBuilder + .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) + .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); OffsetIRTy = DL->getIndexType(PtrIRTy); @@ -1639,10 +1639,8 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder - .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy), - IdxReg) - .getReg(0); + IdxReg = MIRBuilder.buildSplatVector( + OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -2999,19 +2997,6 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { - // A ShuffleVector that has operates on scalable vectors is a splat vector - // where the value of the splat vector is the 0th element of the first - // operand, since the index mask operand is the zeroinitializer (undef and - // poison are treated as zeroinitializer here). - if (U.getOperand(0)->getType()->isScalableTy()) { - Value *Op0 = U.getOperand(0); - auto SplatVal = MIRBuilder.buildExtractVectorElementConstant( - LLT::scalar(Op0->getType()->getScalarSizeInBits()), - getOrCreateVReg(*Op0), 0); - MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal); - return true; - } - ArrayRef Mask; if (auto *SVI = dyn_cast(&U)) Mask = SVI->getShuffleMask(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 2ec47f72aca39a..1d016e684c48f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8391,7 +8391,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { // For vector types create a G_BUILD_VECTOR. if (Ty.isVector()) - Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0); + Val = MIB.buildSplatVector(Ty, Val).getReg(0); return Val; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index a5a136e2effc60..cdd605a5221ad8 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -326,7 +326,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, auto Const = buildInstr(TargetOpcode::G_CONSTANT) .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addCImm(&Val); - return buildSplatBuildVector(Res, Const); + return buildSplatVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_CONSTANT); @@ -363,7 +363,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addFPImm(&Val); - return buildSplatBuildVector(Res, Const); + return buildSplatVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); @@ -711,8 +711,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res, - const SrcOp &Src) { +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } @@ -742,14 +742,6 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); } -MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, - const SrcOp &Src) { - LLT DstTy = Res.getLLTTy(*getMRI()); - assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && - "Expected Src to match Dst elt ty"); - return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); -} - MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index ecb3bd33bdfd49..1d0757c5d7f5f5 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1640,24 +1640,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - - case TargetOpcode::G_SPLAT_VECTOR: { - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); - - if (!DstTy.isScalableVector()) - report("Destination type must be a scalable vector", MI); - - if (!SrcTy.isScalar()) - report("Source type must be a scalar", MI); - - if (DstTy.getScalarType() != SrcTy) - report("Element type of the destination must be the same type as the " - "source type", - MI); - - break; - } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4713bd605c243b..750d70c03eabd7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20920,8 +20920,7 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { unsigned Op = Inst.getOpcode(); if (Op == Instruction::Add || Op == Instruction::Sub || Op == Instruction::And || Op == Instruction::Or || - Op == Instruction::Xor || Op == Instruction::InsertElement || - Op == Instruction::Xor || Op == Instruction::ShuffleVector) + Op == Instruction::Xor || Op == Instruction::InsertElement) return false; if (Inst.getType()->isScalableTy()) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 7774158e15ec58..d87704cf45d5d5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -625,9 +625,6 @@ # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_SPLAT_VECTOR (opcode 217): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll deleted file mode 100644 index df7778899b0d09..00000000000000 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll +++ /dev/null @@ -1,1774 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \ -; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \ -; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s - -define @shufflevector_nxv1i1_0() { - ; RV32-LABEL: name: shufflevector_nxv1i1_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv1i1_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv1i1_1() { - ; RV32-LABEL: name: shufflevector_nxv1i1_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv1i1_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv1i1_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv1i1_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v0 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv1i1_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v0 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv2i1_0() { - ; RV32-LABEL: name: shufflevector_nxv2i1_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv2i1_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv2i1_1() { - ; RV32-LABEL: name: shufflevector_nxv2i1_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv2i1_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv2i1_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv2i1_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v0 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv2i1_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v0 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv4i1_0() { - ; RV32-LABEL: name: shufflevector_nxv4i1_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv4i1_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv4i1_1() { - ; RV32-LABEL: name: shufflevector_nxv4i1_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv4i1_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv4i1_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv4i1_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v0 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv4i1_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v0 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv8i1_0() { - ; RV32-LABEL: name: shufflevector_nxv8i1_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv8i1_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv8i1_1() { - ; RV32-LABEL: name: shufflevector_nxv8i1_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv8i1_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv8i1_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv8i1_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v0 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv8i1_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v0 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv16i1_0() { - ; RV32-LABEL: name: shufflevector_nxv16i1_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv16i1_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv16i1_1() { - ; RV32-LABEL: name: shufflevector_nxv16i1_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv16i1_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv16i1_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv16i1_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v0 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v0 - ; - ; RV64-LABEL: name: shufflevector_nxv16i1_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v0 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) - ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v0 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv1i8_0() { - ; RV32-LABEL: name: shufflevector_nxv1i8_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i8_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv1i8_1() { - ; RV32-LABEL: name: shufflevector_nxv1i8_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i8_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv1i8_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv1i8_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i8_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv2i8_0() { - ; RV32-LABEL: name: shufflevector_nxv2i8_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i8_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv2i8_1() { - ; RV32-LABEL: name: shufflevector_nxv2i8_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i8_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv2i8_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv2i8_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i8_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv4i8_0() { - ; RV32-LABEL: name: shufflevector_nxv4i8_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i8_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv4i8_1() { - ; RV32-LABEL: name: shufflevector_nxv4i8_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i8_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv4i8_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv4i8_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i8_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv8i8_0() { - ; RV32-LABEL: name: shufflevector_nxv8i8_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i8_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv8i8_1() { - ; RV32-LABEL: name: shufflevector_nxv8i8_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i8_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv8i8_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv8i8_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i8_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv16i8_0() { - ; RV32-LABEL: name: shufflevector_nxv16i8_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv16i8_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv16i8_1() { - ; RV32-LABEL: name: shufflevector_nxv16i8_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv16i8_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv16i8_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv16i8_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m2 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv16i8_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m2 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv1i16_0() { - ; RV32-LABEL: name: shufflevector_nxv1i16_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i16_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv1i16_1() { - ; RV32-LABEL: name: shufflevector_nxv1i16_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i16_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv1i16_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv1i16_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i16_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv2i16_0() { - ; RV32-LABEL: name: shufflevector_nxv2i16_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i16_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv2i16_1() { - ; RV32-LABEL: name: shufflevector_nxv2i16_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i16_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv2i16_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv2i16_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i16_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv4i16_0() { - ; RV32-LABEL: name: shufflevector_nxv4i16_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i16_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv4i16_1() { - ; RV32-LABEL: name: shufflevector_nxv4i16_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i16_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv4i16_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv4i16_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv4i16_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv8i16_0() { - ; RV32-LABEL: name: shufflevector_nxv8i16_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv8i16_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv8i16_1() { - ; RV32-LABEL: name: shufflevector_nxv8i16_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv8i16_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv8i16_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv8i16_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m2 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv8i16_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m2 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv16i16_0() { - ; RV32-LABEL: name: shufflevector_nxv16i16_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv16i16_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv16i16_1() { - ; RV32-LABEL: name: shufflevector_nxv16i16_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv16i16_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv16i16_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv16i16_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m4 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv16i16_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m4 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv1i32_0() { - ; RV32-LABEL: name: shufflevector_nxv1i32_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i32_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv1i32_1() { - ; RV32-LABEL: name: shufflevector_nxv1i32_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i32_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv1i32_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv1i32_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i32_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv2i32_0() { - ; RV32-LABEL: name: shufflevector_nxv2i32_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i32_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv2i32_1() { - ; RV32-LABEL: name: shufflevector_nxv2i32_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i32_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv2i32_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv2i32_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv2i32_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv4i32_0() { - ; RV32-LABEL: name: shufflevector_nxv4i32_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv4i32_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv4i32_1() { - ; RV32-LABEL: name: shufflevector_nxv4i32_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv4i32_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv4i32_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv4i32_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m2 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv4i32_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m2 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv8i32_0() { - ; RV32-LABEL: name: shufflevector_nxv8i32_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv8i32_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv8i32_1() { - ; RV32-LABEL: name: shufflevector_nxv8i32_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv8i32_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv8i32_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv8i32_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m4 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv8i32_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m4 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv16i32_0() { - ; RV32-LABEL: name: shufflevector_nxv16i32_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i32_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv16i32_1() { - ; RV32-LABEL: name: shufflevector_nxv16i32_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i32_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv16i32_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv16i32_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i32_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv1i64_0() { - ; RV32-LABEL: name: shufflevector_nxv1i64_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i64_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv1i64_1() { - ; RV32-LABEL: name: shufflevector_nxv1i64_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i64_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv1i64_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv1i64_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8 - ; - ; RV64-LABEL: name: shufflevector_nxv1i64_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv2i64_0() { - ; RV32-LABEL: name: shufflevector_nxv2i64_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv2i64_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv2i64_1() { - ; RV32-LABEL: name: shufflevector_nxv2i64_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv2i64_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv2i64_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv2i64_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m2 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m2 - ; - ; RV64-LABEL: name: shufflevector_nxv2i64_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m2 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m2 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv4i64_0() { - ; RV32-LABEL: name: shufflevector_nxv4i64_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv4i64_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv4i64_1() { - ; RV32-LABEL: name: shufflevector_nxv4i64_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv4i64_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv4i64_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv4i64_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m4 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m4 - ; - ; RV64-LABEL: name: shufflevector_nxv4i64_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m4 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m4 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv8i64_0() { - ; RV32-LABEL: name: shufflevector_nxv8i64_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i64_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv8i64_1() { - ; RV32-LABEL: name: shufflevector_nxv8i64_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i64_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv8i64_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv8i64_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV32-NEXT: PseudoRET implicit $v8m8 - ; - ; RV64-LABEL: name: shufflevector_nxv8i64_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() - ; RV64-NEXT: PseudoRET implicit $v8m8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - -define @shufflevector_nxv16i64_0() { - ; RV32-LABEL: name: shufflevector_nxv16i64_0 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV32-NEXT: $v8m8 = COPY [[UV]]() - ; RV32-NEXT: $v16m8 = COPY [[UV1]]() - ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i64_0 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV64-NEXT: $v8m8 = COPY [[UV]]() - ; RV64-NEXT: $v16m8 = COPY [[UV1]]() - ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - %a = shufflevector poison, poison, poison - ret %a -} - -define @shufflevector_nxv16i64_1() { - ; RV32-LABEL: name: shufflevector_nxv16i64_1 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV32-NEXT: $v8m8 = COPY [[UV]]() - ; RV32-NEXT: $v16m8 = COPY [[UV1]]() - ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i64_1 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV64-NEXT: $v8m8 = COPY [[UV]]() - ; RV64-NEXT: $v16m8 = COPY [[UV1]]() - ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - %a = shufflevector undef, undef, undef - ret %a -} - -define @shufflevector_nxv16i64_2( %a) { - ; RV32-LABEL: name: shufflevector_nxv16i64_2 - ; RV32: bb.1 (%ir-block.0): - ; RV32-NEXT: liveins: $v8m8, $v16m8 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV32-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; RV32-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) - ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV32-NEXT: $v8m8 = COPY [[UV]]() - ; RV32-NEXT: $v16m8 = COPY [[UV1]]() - ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - ; - ; RV64-LABEL: name: shufflevector_nxv16i64_2 - ; RV64: bb.1 (%ir-block.0): - ; RV64-NEXT: liveins: $v8m8, $v16m8 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV64-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; RV64-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() - ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) - ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) - ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() - ; RV64-NEXT: $v8m8 = COPY [[UV]]() - ; RV64-NEXT: $v16m8 = COPY [[UV1]]() - ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 - %b = shufflevector %a , poison, zeroinitializer - ret %b -} - - - diff --git a/llvm/test/MachineVerifier/test_g_splat_vector.mir b/llvm/test/MachineVerifier/test_g_splat_vector.mir deleted file mode 100644 index 0d1d8a3e6dcc64..00000000000000 --- a/llvm/test/MachineVerifier/test_g_splat_vector.mir +++ /dev/null @@ -1,27 +0,0 @@ -# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s -# REQUIRES: aarch64-registered-target ---- -name: g_splat_vector -tracksRegLiveness: true -liveins: -body: | - bb.0: - %0:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_IMPLICIT_DEF - %2:_() = G_IMPLICIT_DEF - - ; CHECK: Destination type must be a scalable vector - %3:_(s32) = G_SPLAT_VECTOR %0 - - ; CHECK: Destination type must be a scalable vector - %4:_(<2 x s32>) = G_SPLAT_VECTOR %0 - - ; CHECK: Source type must be a scalar - %5:_() = G_SPLAT_VECTOR %1 - - ; CHECK: Source type must be a scalar - %6:_() = G_SPLAT_VECTOR %2 - - ; CHECK: Element type of the destination must be the same type as the source type - %7:_() = G_SPLAT_VECTOR %0 -... diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 33155d2c9a9642..73837279701a97 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -147,9 +147,9 @@ TEST_F(AArch64GISelMITest, LowerRotatesVector) { LLT S32 = LLT::scalar(32); LLT V4S32 = LLT::fixed_vector(4, S32); auto SrcTrunc = B.buildTrunc(S32, Copies[0]); - auto Src = B.buildSplatBuildVector(V4S32, SrcTrunc); + auto Src = B.buildSplatVector(V4S32, SrcTrunc); auto AmtTrunc = B.buildTrunc(S32, Copies[1]); - auto Amt = B.buildSplatBuildVector(V4S32, AmtTrunc); + auto Amt = B.buildSplatVector(V4S32, AmtTrunc); auto ROTR = B.buildInstr(TargetOpcode::G_ROTR, {V4S32}, {Src, Amt}); AInfo Info(MF->getSubtarget()); diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index 59a86fa5646f36..f52e49df0bcdee 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -61,7 +61,7 @@ TEST_F(AArch64GISelMITest, MatchIntConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); int64_t Cst; EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_ICstOrSplat(Cst))); EXPECT_EQ(Cst, 42); @@ -625,7 +625,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_SpecificICstSplat(42))); @@ -655,7 +655,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantOrSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE( From 4ce52e2d576937fe930294cae883a0daa17eeced Mon Sep 17 00:00:00 2001 From: Alexey Bataev <5361294+alexey-bataev@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:36:41 -0500 Subject: [PATCH 044/158] [SLP]Improve minbitwidth analysis. This improves overall analysis for minbitwidth in SLP. It allows to analyze the trees with store/insertelement root nodes. Also, instead of using single minbitwidth, detected from the very first analysis stage, it tries to detect the best one for each trunc/ext subtree in the graph and use it for the subtree. Results in better code and less vector register pressure. Metric: size..text Program size..text results results0 diff test-suite :: SingleSource/Benchmarks/Adobe-C++/simple_types_loop_invariant.test 92549.00 92609.00 0.1% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 663381.00 663493.00 0.0% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 663381.00 663493.00 0.0% test-suite :: MultiSource/Benchmarks/Bullet/bullet.test 307182.00 307214.00 0.0% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1394420.00 1394484.00 0.0% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1394420.00 1394484.00 0.0% test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 2040257.00 2040273.00 0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12396098.00 12395858.00 -0.0% test-suite :: External/SPEC/CINT2006/445.gobmk/445.gobmk.test 909944.00 909768.00 -0.0% SingleSource/Benchmarks/Adobe-C++/simple_types_loop_invariant - 4 scalar instructions remain scalar (good). Spec2017/x264 - the whole function idct4x4dc is vectorized using <16 x i16> instead of <16 x i32>, also zext/trunc are removed. In other places last vector zext/sext removed and replaced by extractelement + scalar zext/sext pair. MultiSource/Benchmarks/Bullet/bullet - reduce or <4 x i32> replaced by reduce or <4 x i8> Spec2017/imagick - Removed extra zext from 2 packs of the operations. Spec2017/parest - Removed extra zext, replaced by extractelement+scalar zext Spec2017/blender - the whole bunch of vector zext/sext replaced by extractelement+scalar zext/sext, some extra code vectorized in smaller types. Spec2006/gobmk - fixed cost estimation, some small code remains scalar. Reviewers: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/84334 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 634 ++++++++++++------ .../SLPVectorizer/AArch64/ext-trunc.ll | 9 +- .../SLPVectorizer/AArch64/getelementptr2.ll | 4 +- .../SLPVectorizer/AArch64/reduce-add-i64.ll | 20 +- .../SLPVectorizer/RISCV/reductions.ll | 7 +- .../Transforms/SLPVectorizer/X86/PR35777.ll | 9 +- .../X86/int-bitcast-minbitwidth.ll | 2 +- ...minbitwidth-multiuse-with-insertelement.ll | 17 +- .../X86/minbitwidth-transformed-operand.ll | 21 +- .../SLPVectorizer/X86/minimum-sizes.ll | 43 +- .../SLPVectorizer/X86/phi-undef-input.ll | 24 +- .../Transforms/SLPVectorizer/X86/resched.ll | 32 +- .../X86/reused-reductions-with-minbitwidth.ll | 10 +- .../X86/store-insertelement-minbitwidth.ll | 22 +- .../SLPVectorizer/alt-cmp-vectorize.ll | 4 +- 15 files changed, 553 insertions(+), 305 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 36dc9094538ae9..1889bc09e85028 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1085,6 +1085,9 @@ class BoUpSLP { BS->clear(); } MinBWs.clear(); + ReductionBitWidth = 0; + CastMaxMinBWSizes.reset(); + TruncNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -2287,6 +2290,7 @@ class BoUpSLP { void clearReductionData() { AnalyzedReductionsRoots.clear(); AnalyzedReductionVals.clear(); + AnalyzedMinBWVals.clear(); } /// Checks if the given value is gathered in one of the nodes. bool isAnyGathered(const SmallDenseSet &Vals) const { @@ -2307,9 +2311,11 @@ class BoUpSLP { /// constant and to be demoted. Required to correctly identify constant nodes /// to be demoted. bool collectValuesToDemote( - Value *V, SmallVectorImpl &ToDemote, + Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, + SmallVectorImpl &ToDemote, DenseMap> &DemotedConsts, - SmallVectorImpl &Roots, DenseSet &Visited) const; + DenseSet &Visited, unsigned &MaxDepthLevel, + bool &IsProfitableToDemote) const; /// Check if the operands on the edges \p Edges of the \p UserTE allows /// reordering (i.e. the operands can be reordered because they have only one @@ -2375,6 +2381,10 @@ class BoUpSLP { /// \ returns the graph entry for the \p Idx operand of the \p E entry. const TreeEntry *getOperandEntry(const TreeEntry *E, unsigned Idx) const; + /// \returns Cast context for the given graph node. + TargetTransformInfo::CastContextHint + getCastContextHint(const TreeEntry &TE) const; + /// \returns the cost of the vectorizable entry. InstructionCost getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, @@ -2925,11 +2935,18 @@ class BoUpSLP { } assert(!BundleMember && "Bundle and VL out of sync"); } else { - MustGather.insert(VL.begin(), VL.end()); // Build a map for gathered scalars to the nodes where they are used. + bool AllConstsOrCasts = true; for (Value *V : VL) - if (!isConstant(V)) + if (!isConstant(V)) { + auto *I = dyn_cast(V); + AllConstsOrCasts &= I && I->getType()->isIntegerTy(); ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last); + } + if (AllConstsOrCasts) + CastMaxMinBWSizes = + std::make_pair(std::numeric_limits::max(), 1); + MustGather.insert(VL.begin(), VL.end()); } if (UserTreeIdx.UserTE) @@ -3054,6 +3071,10 @@ class BoUpSLP { /// Set of hashes for the list of reduction values already being analyzed. DenseSet AnalyzedReductionVals; + /// Values, already been analyzed for mininmal bitwidth and found to be + /// non-profitable. + DenseSet AnalyzedMinBWVals; + /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User /// can be nullptr, it means that this Internal Scalar will be used later, @@ -3629,6 +3650,18 @@ class BoUpSLP { /// value must be signed-extended, rather than zero-extended, back to its /// original width. DenseMap> MinBWs; + + /// Final size of the reduced vector, if the current graph represents the + /// input for the reduction and it was possible to narrow the size of the + /// reduction. + unsigned ReductionBitWidth = 0; + + /// If the tree contains any zext/sext/trunc nodes, contains max-min pair of + /// type sizes, used in the tree. + std::optional> CastMaxMinBWSizes; + + /// Indices of the vectorized trunc nodes. + DenseSet TruncNodes; }; } // end namespace slpvectorizer @@ -6539,8 +6572,29 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { + auto [PrevMaxBW, PrevMinBW] = CastMaxMinBWSizes.value_or( + std::make_pair(std::numeric_limits::min(), + std::numeric_limits::max())); + if (ShuffleOrOp == Instruction::ZExt || + ShuffleOrOp == Instruction::SExt) { + CastMaxMinBWSizes = std::make_pair( + std::max(DL->getTypeSizeInBits(VL0->getType()), + PrevMaxBW), + std::min( + DL->getTypeSizeInBits(VL0->getOperand(0)->getType()), + PrevMinBW)); + } else if (ShuffleOrOp == Instruction::Trunc) { + CastMaxMinBWSizes = std::make_pair( + std::max( + DL->getTypeSizeInBits(VL0->getOperand(0)->getType()), + PrevMaxBW), + std::min(DL->getTypeSizeInBits(VL0->getType()), + PrevMinBW)); + TruncNodes.insert(VectorizableTree.size()); + } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); TE->setOperandsInOrder(); @@ -8362,6 +8416,22 @@ const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E, return It->get(); } +TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { + if (TE.State == TreeEntry::ScatterVectorize || + TE.State == TreeEntry::StridedVectorize) + return TTI::CastContextHint::GatherScatter; + if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::Load && + !TE.isAltShuffle()) { + if (TE.ReorderIndices.empty()) + return TTI::CastContextHint::Normal; + SmallVector Mask; + inversePermutation(TE.ReorderIndices, Mask); + if (ShuffleVectorInst::isReverseMask(Mask, Mask.size())) + return TTI::CastContextHint::Reversed; + } + return TTI::CastContextHint::None; +} + InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallPtrSetImpl &CheckedExtracts) { @@ -8384,6 +8454,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. auto It = MinBWs.find(E); + Type *OrigScalarTy = ScalarTy; if (It != MinBWs.end()) { ScalarTy = IntegerType::get(F->getContext(), It->second.first); VecTy = FixedVectorType::get(ScalarTy, VL.size()); @@ -8441,24 +8512,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, UsedScalars.set(I); } auto GetCastContextHint = [&](Value *V) { - if (const TreeEntry *OpTE = getTreeEntry(V)) { - if (OpTE->State == TreeEntry::ScatterVectorize || - OpTE->State == TreeEntry::StridedVectorize) - return TTI::CastContextHint::GatherScatter; - if (OpTE->State == TreeEntry::Vectorize && - OpTE->getOpcode() == Instruction::Load && !OpTE->isAltShuffle()) { - if (OpTE->ReorderIndices.empty()) - return TTI::CastContextHint::Normal; - SmallVector Mask; - inversePermutation(OpTE->ReorderIndices, Mask); - if (ShuffleVectorInst::isReverseMask(Mask, Mask.size())) - return TTI::CastContextHint::Reversed; - } - } else { - InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); - if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) - return TTI::CastContextHint::GatherScatter; - } + if (const TreeEntry *OpTE = getTreeEntry(V)) + return getCastContextHint(*OpTE); + InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); + if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) + return TTI::CastContextHint::GatherScatter; return TTI::CastContextHint::None; }; auto GetCostDiff = @@ -8507,8 +8565,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTI::CastContextHint CCH = GetCastContextHint(VL0); VecCost += TTI->getCastInstrCost(VecOpcode, UserVecTy, VecTy, CCH, CostKind); - ScalarCost += Sz * TTI->getCastInstrCost(VecOpcode, UserScalarTy, - ScalarTy, CCH, CostKind); } } } @@ -8525,7 +8581,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = 0; InstructionCost VecCost = 0; std::tie(ScalarCost, VecCost) = getGEPCosts( - *TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, ScalarTy, VecTy); + *TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, OrigScalarTy, VecTy); LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost, "Calculated GEPs cost for Tree")); @@ -8572,7 +8628,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, NumElts = ATy->getNumElements(); else NumElts = AggregateTy->getStructNumElements(); - SrcVecTy = FixedVectorType::get(ScalarTy, NumElts); + SrcVecTy = FixedVectorType::get(OrigScalarTy, NumElts); } if (I->hasOneUse()) { Instruction *Ext = I->user_back(); @@ -8740,13 +8796,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, } } auto GetScalarCost = [&](unsigned Idx) -> InstructionCost { - // Do not count cost here if minimum bitwidth is in effect and it is just - // a bitcast (here it is just a noop). - if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast) - return TTI::TCC_Free; - auto *VI = VL0->getOpcode() == Opcode - ? cast(UniqueValues[Idx]) - : nullptr; + auto *VI = cast(UniqueValues[Idx]); return TTI->getCastInstrCost(Opcode, VL0->getType(), VL0->getOperand(0)->getType(), TTI::getCastContextHint(VI), CostKind, VI); @@ -8789,7 +8839,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ? CmpInst::BAD_FCMP_PREDICATE : CmpInst::BAD_ICMP_PREDICATE; - return TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, + return TTI->getCmpSelInstrCost(E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, CostKind, VI); }; @@ -8844,7 +8894,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(VI->getOperand(OpIdx)); SmallVector Operands(VI->operand_values()); - return TTI->getArithmeticInstrCost(ShuffleOrOp, ScalarTy, CostKind, + return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind, Op1Info, Op2Info, Operands, VI); }; auto GetVectorCost = [=](InstructionCost CommonCost) { @@ -8863,9 +8913,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::Load: { auto GetScalarCost = [&](unsigned Idx) { auto *VI = cast(UniqueValues[Idx]); - return TTI->getMemoryOpCost(Instruction::Load, ScalarTy, VI->getAlign(), - VI->getPointerAddressSpace(), CostKind, - TTI::OperandValueInfo(), VI); + return TTI->getMemoryOpCost(Instruction::Load, OrigScalarTy, + VI->getAlign(), VI->getPointerAddressSpace(), + CostKind, TTI::OperandValueInfo(), VI); }; auto *LI0 = cast(VL0); auto GetVectorCost = [&](InstructionCost CommonCost) { @@ -8908,9 +8958,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetScalarCost = [=](unsigned Idx) { auto *VI = cast(VL[Idx]); TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getValueOperand()); - return TTI->getMemoryOpCost(Instruction::Store, ScalarTy, VI->getAlign(), - VI->getPointerAddressSpace(), CostKind, - OpInfo, VI); + return TTI->getMemoryOpCost(Instruction::Store, OrigScalarTy, + VI->getAlign(), VI->getPointerAddressSpace(), + CostKind, OpInfo, VI); }; auto *BaseSI = cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); @@ -9772,6 +9822,44 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { Cost -= InsertCost; } + // Add the cost for reduced value resize (if required). + if (ReductionBitWidth != 0) { + assert(UserIgnoreList && "Expected reduction tree."); + const TreeEntry &E = *VectorizableTree.front().get(); + auto It = MinBWs.find(&E); + if (It != MinBWs.end() && It->second.first != ReductionBitWidth) { + unsigned SrcSize = It->second.first; + unsigned DstSize = ReductionBitWidth; + unsigned Opcode = Instruction::Trunc; + if (SrcSize < DstSize) + Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt; + auto *SrcVecTy = + FixedVectorType::get(Builder.getIntNTy(SrcSize), E.getVectorFactor()); + auto *DstVecTy = + FixedVectorType::get(Builder.getIntNTy(DstSize), E.getVectorFactor()); + TTI::CastContextHint CCH = getCastContextHint(E); + InstructionCost CastCost; + switch (E.getOpcode()) { + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::Trunc: { + const TreeEntry *OpTE = getOperandEntry(&E, 0); + CCH = getCastContextHint(*OpTE); + break; + } + default: + break; + } + CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH, + TTI::TCK_RecipThroughput); + Cost += CastCost; + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost + << " for final resize for reduction from " << SrcVecTy + << " to " << DstVecTy << "\n"; + dbgs() << "SLP: Current total cost = " << Cost << "\n"); + } + } + #ifndef NDEBUG SmallString<256> Str; { @@ -9992,6 +10080,30 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // tree node for each gathered value - we have just a permutation of the // single vector. If we have 2 different sets, we're in situation where we // have a permutation of 2 input vectors. + // Filter out entries with larger bitwidth of elements. + Type *ScalarTy = VL.front()->getType(); + unsigned BitWidth = 0; + if (ScalarTy->isIntegerTy()) { + // Check if the used TEs supposed to be resized and choose the best + // candidates. + BitWidth = DL->getTypeStoreSize(ScalarTy); + if (TEUseEI.UserTE->getOpcode() != Instruction::Select || + TEUseEI.EdgeIdx != 0) { + auto UserIt = MinBWs.find(TEUseEI.UserTE); + if (UserIt != MinBWs.end()) + BitWidth = UserIt->second.second; + } + } + auto CheckBitwidth = [&](const TreeEntry &TE) { + Type *ScalarTy = TE.Scalars.front()->getType(); + if (!ScalarTy->isIntegerTy()) + return true; + unsigned TEBitWidth = DL->getTypeStoreSize(ScalarTy); + auto UserIt = MinBWs.find(TEUseEI.UserTE); + if (UserIt != MinBWs.end()) + TEBitWidth = UserIt->second.second; + return BitWidth == TEBitWidth; + }; SmallVector> UsedTEs; DenseMap UsedValuesEntry; for (Value *V : VL) { @@ -10026,6 +10138,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; } + if (!CheckBitwidth(*TEPtr)) + continue; // Check if the user node of the TE comes after user node of TEPtr, // otherwise TEPtr depends on TE. if ((TEInsertBlock != InsertPt->getParent() || @@ -10042,8 +10156,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; VTE = *It->getSecond().begin(); // Iterate through all vectorized nodes. - auto *MIt = find_if(It->getSecond(), [](const TreeEntry *MTE) { - return MTE->State == TreeEntry::Vectorize; + auto *MIt = find_if(It->getSecond(), [&](const TreeEntry *MTE) { + return MTE->State == TreeEntry::Vectorize && CheckBitwidth(*MTE); }); if (MIt == It->getSecond().end()) continue; @@ -10053,10 +10167,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( Instruction &LastBundleInst = getLastInstructionInBundle(VTE); if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst)) continue; - auto It = MinBWs.find(VTE); - // If vectorize node is demoted - do not match. - if (It != MinBWs.end() && - It->second.first != DL->getTypeSizeInBits(V->getType())) + if (!CheckBitwidth(*VTE)) continue; VToTEs.insert(VTE); } @@ -12929,7 +13040,21 @@ Value *BoUpSLP::vectorizeTree( Builder.ClearInsertionPoint(); InstrElementSize.clear(); - return VectorizableTree[0]->VectorizedValue; + const TreeEntry &RootTE = *VectorizableTree.front().get(); + Value *Vec = RootTE.VectorizedValue; + if (auto It = MinBWs.find(&RootTE); ReductionBitWidth != 0 && + It != MinBWs.end() && + ReductionBitWidth != It->second.first) { + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(ReductionRoot->getParent(), + ReductionRoot->getIterator()); + Vec = Builder.CreateIntCast( + Vec, + VectorType::get(Builder.getIntNTy(ReductionBitWidth), + cast(Vec->getType())->getElementCount()), + It->second.second); + } + return Vec; } void BoUpSLP::optimizeGatherSequence() { @@ -13749,23 +13874,42 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // smaller type with a truncation. We collect the values that will be demoted // in ToDemote and additional roots that require investigating in Roots. bool BoUpSLP::collectValuesToDemote( - Value *V, SmallVectorImpl &ToDemote, + Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, + SmallVectorImpl &ToDemote, DenseMap> &DemotedConsts, - SmallVectorImpl &Roots, DenseSet &Visited) const { + DenseSet &Visited, unsigned &MaxDepthLevel, + bool &IsProfitableToDemote) const { // We can always demote constants. - if (isa(V)) + if (isa(V)) { + MaxDepthLevel = 1; return true; + } // If the value is not a vectorized instruction in the expression and not used // by the insertelement instruction and not used in multiple vector nodes, it // cannot be demoted. + // TODO: improve handling of gathered values and others. auto *I = dyn_cast(V); - if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) || - !Visited.insert(I).second || all_of(I->users(), [&](User *U) { + if (!I || !Visited.insert(I).second || !getTreeEntry(I) || + MultiNodeScalars.contains(I) || all_of(I->users(), [&](User *U) { return isa(U) && !getTreeEntry(U); })) return false; + auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { + if (MultiNodeScalars.contains(V)) + return false; + uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL))) + return true; + auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); + unsigned BitWidth1 = OrigBitWidth - NumSignBits; + if (!isKnownNonNegative(V, SimplifyQuery(*DL))) + ++BitWidth1; + BitWidth = std::max(BitWidth, BitWidth1); + return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2); + }; unsigned Start = 0; unsigned End = I->getNumOperands(); switch (I->getOpcode()) { @@ -13773,12 +13917,14 @@ bool BoUpSLP::collectValuesToDemote( // We can always demote truncations and extensions. Since truncations can // seed additional demotion, we save the truncated value. case Instruction::Trunc: - Roots.push_back(I->getOperand(0)); + MaxDepthLevel = 1; + if (IsProfitableToDemoteRoot) + IsProfitableToDemote = true; break; case Instruction::ZExt: case Instruction::SExt: - if (isa(I->getOperand(0))) - return false; + MaxDepthLevel = 1; + IsProfitableToDemote = true; break; // We can demote certain binary operations if we can demote both of their @@ -13788,23 +13934,32 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::Mul: case Instruction::And: case Instruction::Or: - case Instruction::Xor: - if (!collectValuesToDemote(I->getOperand(0), ToDemote, DemotedConsts, Roots, - Visited) || - !collectValuesToDemote(I->getOperand(1), ToDemote, DemotedConsts, Roots, - Visited)) + case Instruction::Xor: { + unsigned Level1, Level2; + if (!collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot, + BitWidth, ToDemote, DemotedConsts, Visited, + Level1, IsProfitableToDemote) || + !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot, + BitWidth, ToDemote, DemotedConsts, Visited, + Level2, IsProfitableToDemote)) return false; + MaxDepthLevel = std::max(Level1, Level2); break; + } // We can demote selects if we can demote their true and false values. case Instruction::Select: { Start = 1; + unsigned Level1, Level2; SelectInst *SI = cast(I); - if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, DemotedConsts, - Roots, Visited) || - !collectValuesToDemote(SI->getFalseValue(), ToDemote, DemotedConsts, - Roots, Visited)) + if (!collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot, + BitWidth, ToDemote, DemotedConsts, Visited, + Level1, IsProfitableToDemote) || + !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot, + BitWidth, ToDemote, DemotedConsts, Visited, + Level2, IsProfitableToDemote)) return false; + MaxDepthLevel = std::max(Level1, Level2); break; } @@ -13813,171 +13968,262 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::PHI: { PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!collectValuesToDemote(IncValue, ToDemote, DemotedConsts, Roots, - Visited)) + if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth, + ToDemote, DemotedConsts, Visited, + MaxDepthLevel, IsProfitableToDemote)) return false; break; } // Otherwise, conservatively give up. default: - return false; + return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth); } + ++MaxDepthLevel; // Gather demoted constant operands. for (unsigned Idx : seq(Start, End)) if (isa(I->getOperand(Idx))) DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx); // Record the value that we can demote. ToDemote.push_back(V); - return true; + return IsProfitableToDemote; } void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. - auto &TreeRoot = VectorizableTree[0]->Scalars; - auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); - if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather) + bool IsStoreOrInsertElt = + VectorizableTree.front()->getOpcode() == Instruction::Store || + VectorizableTree.front()->getOpcode() == Instruction::InsertElement; + if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && + (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || + CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; + unsigned NodeIdx = 0; + if (IsStoreOrInsertElt && + VectorizableTree.front()->State != TreeEntry::NeedToGather) + NodeIdx = 1; + // Ensure the roots of the vectorizable tree don't form a cycle. - if (!VectorizableTree.front()->UserTreeIndices.empty()) + if (VectorizableTree[NodeIdx]->State == TreeEntry::NeedToGather || + (NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) || + (NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices, + [NodeIdx](const EdgeInfo &EI) { + return EI.UserTE->Idx > + static_cast(NodeIdx); + }))) return; - // Conservatively determine if we can actually truncate the roots of the - // expression. Collect the values that can be demoted in ToDemote and - // additional roots that require investigating in Roots. - SmallVector ToDemote; - DenseMap> DemotedConsts; - SmallVector Roots; - for (auto *Root : TreeRoot) { - DenseSet Visited; - if (!collectValuesToDemote(Root, ToDemote, DemotedConsts, Roots, Visited)) - return; - } - - // The maximum bit width required to represent all the values that can be - // demoted without loss of precision. It would be safe to truncate the roots - // of the expression to this width. - auto MaxBitWidth = 1u; - - // We first check if all the bits of the roots are demanded. If they're not, - // we can truncate the roots to this narrower type. - for (auto *Root : TreeRoot) { - auto Mask = DB->getDemandedBits(cast(Root)); - MaxBitWidth = std::max(Mask.getBitWidth() - Mask.countl_zero(), - MaxBitWidth); - } - - // True if the roots can be zero-extended back to their original type, rather - // than sign-extended. We know that if the leading bits are not demanded, we - // can safely zero-extend. So we initialize IsKnownPositive to True. - bool IsKnownPositive = true; - - // If all the bits of the roots are demanded, we can try a little harder to - // compute a narrower type. This can happen, for example, if the roots are - // getelementptr indices. InstCombine promotes these indices to the pointer - // width. Thus, all their bits are technically demanded even though the - // address computation might be vectorized in a smaller type. - // - // We start by looking at each entry that can be demoted. We compute the - // maximum bit width required to store the scalar by using ValueTracking to - // compute the number of high-order bits we can truncate. - if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) && - all_of(TreeRoot, [](Value *V) { - return all_of(V->users(), - [](User *U) { return isa(U); }); - })) { - MaxBitWidth = 8u; + // The first value node for store/insertelement is sext/zext/trunc? Skip it, + // resize to the final type. + bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt; + if (NodeIdx != 0 && + VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize && + (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt || + VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt || + VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) { + assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph."); + ++NodeIdx; + IsProfitableToDemoteRoot = true; + } + + // Analyzed in reduction already and not profitable - exit. + if (AnalyzedMinBWVals.contains(VectorizableTree[NodeIdx]->Scalars.front())) + return; + SmallVector ToDemote; + DenseMap> DemotedConsts; + auto ComputeMaxBitWidth = [&](ArrayRef TreeRoot, unsigned VF, + bool IsTopRoot, bool IsProfitableToDemoteRoot, + unsigned Opcode, unsigned Limit) { + ToDemote.clear(); + auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); + if (!TreeRootIT || !Opcode) + return 0u; + + if (AnalyzedMinBWVals.contains(TreeRoot.front())) + return 0u; + + unsigned NumParts = TTI->getNumberOfParts( + FixedVectorType::get(TreeRoot.front()->getType(), VF)); + + // The maximum bit width required to represent all the values that can be + // demoted without loss of precision. It would be safe to truncate the roots + // of the expression to this width. + unsigned MaxBitWidth = 1u; + + // True if the roots can be zero-extended back to their original type, + // rather than sign-extended. We know that if the leading bits are not + // demanded, we can safely zero-extend. So we initialize IsKnownPositive to + // True. // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) { + bool IsKnownPositive = all_of(TreeRoot, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); - // Determine the maximum number of bits required to store the scalar - // values. - for (auto *Scalar : ToDemote) { - auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT); - auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); - MaxBitWidth = std::max(NumTypeBits - NumSignBits, MaxBitWidth); - } - - // If we can't prove that the sign bit is zero, we must add one to the - // maximum bit width to account for the unknown sign bit. This preserves - // the existing sign bit so we can safely sign-extend the root back to the - // original type. Otherwise, if we know the sign bit is zero, we will - // zero-extend the root instead. - // - // FIXME: This is somewhat suboptimal, as there will be cases where adding - // one to the maximum bit width will yield a larger-than-necessary - // type. In general, we need to add an extra bit only if we can't - // prove that the upper bit of the original type is equal to the - // upper bit of the proposed smaller type. If these two bits are the - // same (either zero or one) we know that sign-extending from the - // smaller type will result in the same value. Here, since we can't - // yet prove this, we are just making the proposed smaller type - // larger to ensure correctness. - if (!IsKnownPositive) - ++MaxBitWidth; - } - - // Round MaxBitWidth up to the next power-of-two. - MaxBitWidth = llvm::bit_ceil(MaxBitWidth); - - // If the maximum bit width we compute is less than the with of the roots' - // type, we can proceed with the narrowing. Otherwise, do nothing. - if (MaxBitWidth >= TreeRootIT->getBitWidth()) - return; + // We first check if all the bits of the roots are demanded. If they're not, + // we can truncate the roots to this narrower type. + for (auto *Root : TreeRoot) { + unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); + TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()); + unsigned BitWidth1 = NumTypeBits - NumSignBits; + // If we can't prove that the sign bit is zero, we must add one to the + // maximum bit width to account for the unknown sign bit. This preserves + // the existing sign bit so we can safely sign-extend the root back to the + // original type. Otherwise, if we know the sign bit is zero, we will + // zero-extend the root instead. + // + // FIXME: This is somewhat suboptimal, as there will be cases where adding + // one to the maximum bit width will yield a larger-than-necessary + // type. In general, we need to add an extra bit only if we can't + // prove that the upper bit of the original type is equal to the + // upper bit of the proposed smaller type. If these two bits are + // the same (either zero or one) we know that sign-extending from + // the smaller type will result in the same value. Here, since we + // can't yet prove this, we are just making the proposed smaller + // type larger to ensure correctness. + if (!IsKnownPositive) + ++BitWidth1; + + APInt Mask = DB->getDemandedBits(cast(Root)); + unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); + MaxBitWidth = + std::max(std::min(BitWidth1, BitWidth2), MaxBitWidth); + } + + if (MaxBitWidth < 8 && MaxBitWidth > 1) + MaxBitWidth = 8; + + // If the original type is large, but reduced type does not improve the reg + // use - ignore it. + if (NumParts > 1 && + NumParts == + TTI->getNumberOfParts(FixedVectorType::get( + IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF))) + return 0u; + + bool IsProfitableToDemote = Opcode == Instruction::Trunc || + Opcode == Instruction::SExt || + Opcode == Instruction::ZExt || NumParts > 1; + // Conservatively determine if we can actually truncate the roots of the + // expression. Collect the values that can be demoted in ToDemote and + // additional roots that require investigating in Roots. + for (auto *Root : TreeRoot) { + DenseSet Visited; + unsigned MaxDepthLevel; + bool NeedToDemote = IsProfitableToDemote; + + if (!collectValuesToDemote(Root, IsProfitableToDemoteRoot, MaxBitWidth, + ToDemote, DemotedConsts, Visited, + MaxDepthLevel, NeedToDemote) || + (MaxDepthLevel <= Limit && + !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && + (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || + DL->getTypeSizeInBits(Root->getType()) / + DL->getTypeSizeInBits( + cast(Root)->getOperand(0)->getType()) > + 2)) || + (Opcode == Instruction::Trunc && + (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || + DL->getTypeSizeInBits( + cast(Root)->getOperand(0)->getType()) / + DL->getTypeSizeInBits(Root->getType()) > + 2))))) + return 0u; + } + // Round MaxBitWidth up to the next power-of-two. + MaxBitWidth = bit_ceil(MaxBitWidth); + + return MaxBitWidth; + }; // If we can truncate the root, we must collect additional values that might // be demoted as a result. That is, those seeded by truncations we will // modify. - while (!Roots.empty()) { - DenseSet Visited; - collectValuesToDemote(Roots.pop_back_val(), ToDemote, DemotedConsts, Roots, - Visited); - } - - // Check that all users are marked for demotion. - DenseSet Demoted(ToDemote.begin(), ToDemote.end()); - DenseSet Visited; - for (Value *V: ToDemote) { - const TreeEntry *TE = getTreeEntry(V); - assert(TE && "Expected vectorized scalar."); - if (!Visited.insert(TE).second) - continue; - if (!all_of(TE->UserTreeIndices, [&](const EdgeInfo &EI) { - return all_of(EI.UserTE->Scalars, - [&](Value *V) { return Demoted.contains(V); }); - })) - return; - } - // Finally, map the values we can demote to the maximum bit with we computed. - for (auto *Scalar : ToDemote) { - auto *TE = getTreeEntry(Scalar); - assert(TE && "Expected vectorized scalar."); - if (MinBWs.contains(TE)) + // Add reduction ops sizes, if any. + if (UserIgnoreList && + isa(VectorizableTree.front()->Scalars.front()->getType())) { + for (Value *V : *UserIgnoreList) { + auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); + auto NumTypeBits = DL->getTypeSizeInBits(V->getType()); + unsigned BitWidth1 = NumTypeBits - NumSignBits; + if (!isKnownNonNegative(V, SimplifyQuery(*DL))) + ++BitWidth1; + auto Mask = DB->getDemandedBits(cast(V)); + unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); + ReductionBitWidth = + std::max(std::min(BitWidth1, BitWidth2), ReductionBitWidth); + } + if (ReductionBitWidth < 8 && ReductionBitWidth > 1) + ReductionBitWidth = 8; + + ReductionBitWidth = bit_ceil(ReductionBitWidth); + } + bool IsTopRoot = NodeIdx == 0; + while (NodeIdx < VectorizableTree.size() && + VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize && + VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) + ++NodeIdx; + while (NodeIdx < VectorizableTree.size()) { + ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; + unsigned Limit = 2; + unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode(); + if (IsTopRoot && + ReductionBitWidth == + DL->getTypeSizeInBits( + VectorizableTree.front()->Scalars.front()->getType())) + Limit = 3; + unsigned MaxBitWidth = ComputeMaxBitWidth( + TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, + IsProfitableToDemoteRoot, Opcode, Limit); + IsTopRoot = false; + IsProfitableToDemoteRoot = true; + + if (TruncNodes.empty()) { + NodeIdx = VectorizableTree.size(); + } else { + NodeIdx = *TruncNodes.begin() + 1; + TruncNodes.erase(TruncNodes.begin()); + } + + // If the maximum bit width we compute is less than the with of the roots' + // type, we can proceed with the narrowing. Otherwise, do nothing. + if (MaxBitWidth == 0 || + MaxBitWidth >= + cast(TreeRoot.front()->getType())->getBitWidth()) { + if (UserIgnoreList) + AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end()); continue; - bool IsSigned = any_of(TE->Scalars, [&](Value *R) { - KnownBits Known = computeKnownBits(R, *DL); - return !Known.isNonNegative(); - }); - MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); - const auto *I = cast(Scalar); - auto DCIt = DemotedConsts.find(I); - if (DCIt != DemotedConsts.end()) { - for (unsigned Idx : DCIt->getSecond()) { - // Check that all instructions operands are demoted. - if (all_of(TE->Scalars, [&](Value *V) { - auto SIt = DemotedConsts.find(cast(V)); - return SIt != DemotedConsts.end() && - is_contained(SIt->getSecond(), Idx); - })) { + } + + // Finally, map the values we can demote to the maximum bit with we + // computed. + for (Value *Scalar : ToDemote) { + TreeEntry *TE = getTreeEntry(Scalar); + assert(TE && "Expected vectorized scalar."); + if (MinBWs.contains(TE)) + continue; + bool IsSigned = any_of(TE->Scalars, [&](Value *R) { + return !isKnownNonNegative(R, SimplifyQuery(*DL)); + }); + MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); + const auto *I = cast(Scalar); + auto DCIt = DemotedConsts.find(I); + if (DCIt != DemotedConsts.end()) { + for (unsigned Idx : DCIt->getSecond()) { + // Check that all instructions operands are demoted. const TreeEntry *CTE = getOperandEntry(TE, Idx); - MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned); + if (all_of(TE->Scalars, + [&](Value *V) { + auto SIt = DemotedConsts.find(cast(V)); + return SIt != DemotedConsts.end() && + is_contained(SIt->getSecond(), Idx); + }) || + all_of(CTE->Scalars, Constant::classof)) + MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned); } } } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll index cef791633655a8..5e3fd156666f5f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -17,12 +17,13 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SUB0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]] ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4 ; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3 ; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll index 47485e514ec2fc..1cce52060c479f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ;test_i16_extend NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s -; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll index d67fdc1cd6aa0e..a7a7f642ced538 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll @@ -28,21 +28,11 @@ entry: define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 -; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 -; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 -; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 -; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 -; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] -; CHECK-NEXT: ret i64 [[ADD_3]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64 +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %ld0 = load i8, ptr %ptr diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 000e7a56df3778..500f10659f04cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -802,9 +802,10 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]]) -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64 +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %ld0 = load i8, ptr %ptr diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll index 4565d4928ba4ad..05511f843a68fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -15,11 +15,12 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 -; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP12]], 1 ; CHECK-NEXT: ret { i64, i64 } [[T17]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index a0af8e36b36c79..5ee80160765387 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-3 < %s | FileCheck %s +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s define void @t(i64 %v) { ; CHECK-LABEL: define void @t( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll index 6e512fcbb73924..6051638562b59b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll @@ -6,18 +6,17 @@ define void @test(i8 %0) { ; CHECK-SAME: i8 [[TMP0:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> , i8 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 1 ; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SHR]] to i8 ; CHECK-NEXT: store i8 [[CONV9]], ptr null, align 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 2c834616becc0d..4acd63078b82ef 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -6,15 +6,20 @@ define void @test(i64 %d.promoted.i) { ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> , i64 [[AND_1_I_1]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9 -; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0 -; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I_1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0 +; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll index 651631de2c35ad..a316415dcc6b52 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -17,12 +17,15 @@ target triple = "x86_64-unknown-linux-gnu" define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-LABEL: @PR31243_zext( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 -; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 -; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 -; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] -; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 +; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 +; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 +; SSE-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] ; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] @@ -73,12 +76,15 @@ entry: define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-LABEL: @PR31243_sext( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 -; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 -; SSE-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64 -; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] -; SSE-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64 -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 +; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 +; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 +; SSE-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64 +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] ; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] @@ -89,13 +95,12 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 ; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16> -; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0 -; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 -; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]] -; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1 -; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64 -; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 +; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 +; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] +; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 +; AVX-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64 +; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] ; AVX-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; AVX-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; AVX-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll index 88f75c37846efc..3cc32c1fc7b28e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -15,8 +15,8 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -52,8 +52,8 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -89,8 +89,8 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -127,8 +127,8 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -165,8 +165,8 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -202,8 +202,8 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 78c6d9516a3dec..b7237cbb02bb32 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -11,26 +11,26 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[SHUFFLE1]], +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], ; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5 ; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6 ; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i8> [[TMP13]], -; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr undef, align 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_4_I_I]], i32 5 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_5_I_I]], i32 6 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_6_I_I]], i32 7 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[TMP13]] to <16 x i8> +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i8> [[TMP14]], +; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll index 5d22b5a4873be3..1d1fcec2a7aeba 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll @@ -7,12 +7,10 @@ define i1 @test(i1 %cmp5.not.31) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> , i1 [[CMP5_NOT_31]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0 -; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 0 +; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[CMP_NOT_I_I]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll index c1dd90d0e9a7bb..2f6868d8dfd628 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll @@ -8,17 +8,18 @@ ; YAML-NEXT: Function: stores ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-3' +; YAML-NEXT: - Cost: '-7' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) { ; CHECK-LABEL: @stores( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] -; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; %load.1 = load i8, ptr %in, align 1 @@ -63,17 +64,18 @@ define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) { ; YAML-NEXT: Function: insertelems ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-5' +; YAML-NEXT: - Cost: '-9' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define <4 x i64> @insertelems(ptr noalias %in, ptr noalias %inn) { ; CHECK-LABEL: @insertelems( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] -; CHECK-NEXT: ret <4 x i64> [[TMP5]] +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[TMP6]] ; %load.1 = load i8, ptr %in, align 1 %gep.1 = getelementptr inbounds i8, ptr %in, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll b/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll index 061fbdb45a13bc..ff6f0bdd3db8f2 100644 --- a/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll @@ -10,8 +10,8 @@ define i32 @alt_cmp(i16 %call46) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i16> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i16 ; CHECK-NEXT: [[OP_RDX:%.*]] = or i16 [[TMP6]], 0 ; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[OP_RDX]] to i32 ; CHECK-NEXT: ret i32 [[EXT]] From a213df5d3895f323ef0d2d9affc1020414576caa Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 09:45:14 -0600 Subject: [PATCH 045/158] [LinkerWrapper] Use the correct empty file on Windows (#84322) Summary: The clang-offload-bundler uses an empty file to control the bundles made for embedding. Previously this still used `/dev/null` by mistake even on Windows. --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 576e8f2cd7f8fd..0a783db8962ba7 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -410,7 +410,11 @@ fatbinary(ArrayRef> InputFiles, Targets.push_back(Saver.save("hipv4-amdgcn-amd-amdhsa--" + Arch)); CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); +#ifdef _WIN32 + CmdArgs.push_back("-input=NUL"); +#else CmdArgs.push_back("-input=/dev/null"); +#endif for (const auto &[File, Arch] : InputFiles) CmdArgs.push_back(Saver.save("-input=" + File)); From d6b3be375ffed14fefc93c2031cd56e680afd0c1 Mon Sep 17 00:00:00 2001 From: sylvain-audi <62035306+sylvain-audi@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:54:41 -0500 Subject: [PATCH 046/158] [NFC][Asan] Prepare AddressSanitizer to detect inserted runtime calls (#84223) This is in preparation for an upcoming commit that will add "funclet" OpBundle to the inserted runtime calls where the function's EH personality requires it. See PR https://github.com/llvm/llvm-project/pull/82533 --- .../Instrumentation/AddressSanitizer.cpp | 184 +++++++++++------- 1 file changed, 111 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f22f53b8cd8fc6..c95a50a033b1b2 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -642,6 +642,23 @@ static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { } namespace { +/// Helper RAII class to post-process inserted asan runtime calls during a +/// pass on a single Function. This is a no-op implementation, for a first NFC +/// commit. Coming up: detect and add "funclet" opBundle to function calls that +/// need them. +class RuntimeCallInserter { + Function *OwnerFn = nullptr; + +public: + RuntimeCallInserter(Function &Fn) : OwnerFn(&Fn) {} + + CallInst *createRuntimeCall(IRBuilder<> &IRB, FunctionCallee Callee, + ArrayRef Args = {}, + const Twine &Name = "") { + assert(IRB.GetInsertBlock()->getParent() == OwnerFn); + return IRB.CreateCall(Callee, Args, Name, nullptr); + } +}; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { @@ -691,12 +708,14 @@ struct AddressSanitizer { void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, InterestingMemoryOperand &O, bool UseCalls, - const DataLayout &DL); - void instrumentPointerComparisonOrSubtraction(Instruction *I); + const DataLayout &DL, RuntimeCallInserter &RTCI); + void instrumentPointerComparisonOrSubtraction(Instruction *I, + RuntimeCallInserter &RTCI); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, MaybeAlign Alignment, uint32_t TypeStoreSize, bool IsWrite, - Value *SizeArgument, bool UseCalls, uint32_t Exp); + Value *SizeArgument, bool UseCalls, uint32_t Exp, + RuntimeCallInserter &RTCI); Instruction *instrumentAMDGPUAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeStoreSize, bool IsWrite, @@ -707,20 +726,22 @@ struct AddressSanitizer { Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp); + uint32_t Exp, + RuntimeCallInserter &RTCI); void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Value *EVL, Value *Stride, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp); + uint32_t Exp, RuntimeCallInserter &RTCI); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeStoreSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, - Value *SizeArgument, uint32_t Exp); - void instrumentMemIntrinsic(MemIntrinsic *MI); + Value *SizeArgument, uint32_t Exp, + RuntimeCallInserter &RTCI); + void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); @@ -912,6 +933,7 @@ class ModuleAddressSanitizer { struct FunctionStackPoisoner : public InstVisitor { Function &F; AddressSanitizer &ASan; + RuntimeCallInserter &RTCI; DIBuilder DIB; LLVMContext *C; Type *IntptrTy; @@ -948,10 +970,12 @@ struct FunctionStackPoisoner : public InstVisitor { bool HasReturnsTwiceCall = false; bool PoisonStack; - FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) - : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), - C(ASan.C), IntptrTy(ASan.IntptrTy), - IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), + FunctionStackPoisoner(Function &F, AddressSanitizer &ASan, + RuntimeCallInserter &RTCI) + : F(F), ASan(ASan), RTCI(RTCI), + DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), + IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), + Mapping(ASan.Mapping), PoisonStack(ClStack && !Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {} @@ -1034,8 +1058,8 @@ struct FunctionStackPoisoner : public InstVisitor { DynamicAreaOffset); } - IRB.CreateCall( - AsanAllocasUnpoisonFunc, + RTCI.createRuntimeCall( + IRB, AsanAllocasUnpoisonFunc, {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr}); } @@ -1251,16 +1275,18 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { } // Instrument memset/memmove/memcpy -void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { +void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI, + RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(MI); if (isa(MI)) { - IRB.CreateCall(isa(MI) ? AsanMemmove : AsanMemcpy, - {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), - IRB.CreateAddrSpaceCast(MI->getOperand(1), PtrTy), - IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); + RTCI.createRuntimeCall( + IRB, isa(MI) ? AsanMemmove : AsanMemcpy, + {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), + IRB.CreateAddrSpaceCast(MI->getOperand(1), PtrTy), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); } else if (isa(MI)) { - IRB.CreateCall( - AsanMemset, + RTCI.createRuntimeCall( + IRB, AsanMemset, {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); @@ -1498,7 +1524,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { } void AddressSanitizer::instrumentPointerComparisonOrSubtraction( - Instruction *I) { + Instruction *I, RuntimeCallInserter &RTCI) { IRBuilder<> IRB(I); FunctionCallee F = isa(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; @@ -1506,7 +1532,7 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction( if (i->getType()->isPointerTy()) i = IRB.CreatePointerCast(i, IntptrTy); } - IRB.CreateCall(F, Param); + RTCI.createRuntimeCall(IRB, F, Param); } static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, @@ -1514,7 +1540,7 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, MaybeAlign Alignment, unsigned Granularity, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp) { + uint32_t Exp, RuntimeCallInserter &RTCI) { // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check // if the data is properly aligned. if (!TypeStoreSize.isScalable()) { @@ -1529,18 +1555,19 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, *Alignment >= FixedSize / 8) return Pass->instrumentAddress(I, InsertBefore, Addr, Alignment, FixedSize, IsWrite, nullptr, UseCalls, - Exp); + Exp, RTCI); } } Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeStoreSize, - IsWrite, nullptr, UseCalls, Exp); + IsWrite, nullptr, UseCalls, Exp, RTCI); } void AddressSanitizer::instrumentMaskedLoadOrStore( AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Value *EVL, Value *Stride, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, - Value *SizeArgument, bool UseCalls, uint32_t Exp) { + Value *SizeArgument, bool UseCalls, uint32_t Exp, + RuntimeCallInserter &RTCI) { auto *VTy = cast(OpType); TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); @@ -1595,15 +1622,16 @@ void AddressSanitizer::instrumentMaskedLoadOrStore( } else { InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); } - doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), - InstrumentedAddress, Alignment, Granularity, - ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); + doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, + Alignment, Granularity, ElemTypeSize, IsWrite, + SizeArgument, UseCalls, Exp, RTCI); }); } void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, InterestingMemoryOperand &O, bool UseCalls, - const DataLayout &DL) { + const DataLayout &DL, + RuntimeCallInserter &RTCI) { Value *Addr = O.getPtr(); // Optimization experiments. @@ -1649,11 +1677,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, O.MaybeStride, O.getInsn(), Addr, O.Alignment, Granularity, O.OpType, O.IsWrite, nullptr, - UseCalls, Exp); + UseCalls, Exp, RTCI); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, - Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, - Exp); + Granularity, O.TypeStoreSize, O.IsWrite, nullptr, + UseCalls, Exp, RTCI); } } @@ -1661,24 +1689,25 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, - uint32_t Exp) { + uint32_t Exp, + RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(InsertBefore); Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); CallInst *Call = nullptr; if (SizeArgument) { if (Exp == 0) - Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0], - {Addr, SizeArgument}); + Call = RTCI.createRuntimeCall(IRB, AsanErrorCallbackSized[IsWrite][0], + {Addr, SizeArgument}); else - Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1], - {Addr, SizeArgument, ExpVal}); + Call = RTCI.createRuntimeCall(IRB, AsanErrorCallbackSized[IsWrite][1], + {Addr, SizeArgument, ExpVal}); } else { if (Exp == 0) - Call = - IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); + Call = RTCI.createRuntimeCall( + IRB, AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); else - Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex], - {Addr, ExpVal}); + Call = RTCI.createRuntimeCall( + IRB, AsanErrorCallback[IsWrite][1][AccessSizeIndex], {Addr, ExpVal}); } Call->setCannotMerge(); @@ -1754,7 +1783,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, MaybeAlign Alignment, uint32_t TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp) { + uint32_t Exp, + RuntimeCallInserter &RTCI) { if (TargetTriple.isAMDGPU()) { InsertBefore = instrumentAMDGPUAddress(OrigIns, InsertBefore, Addr, TypeStoreSize, IsWrite, SizeArgument); @@ -1779,11 +1809,12 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], - AddrLong); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], AddrLong); else - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], - {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], + {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); return; } @@ -1830,8 +1861,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); } - Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, - AccessSizeIndex, SizeArgument, Exp); + Instruction *Crash = generateCrashCode( + CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument, Exp, RTCI); if (OrigIns->getDebugLoc()) Crash->setDebugLoc(OrigIns->getDebugLoc()); } @@ -1841,8 +1872,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. void AddressSanitizer::instrumentUnusualSizeOrAlignment( - Instruction *I, Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize, - bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { + Instruction *I, Instruction *InsertBefore, Value *Addr, + TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, + uint32_t Exp, RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(InsertBefore); Value *NumBits = IRB.CreateTypeSize(IntptrTy, TypeStoreSize); Value *Size = IRB.CreateLShr(NumBits, ConstantInt::get(IntptrTy, 3)); @@ -1850,19 +1882,21 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], - {AddrLong, Size}); + RTCI.createRuntimeCall(IRB, AsanMemoryAccessCallbackSized[IsWrite][0], + {AddrLong, Size}); else - IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1], - {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallbackSized[IsWrite][1], + {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); } else { Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); Value *LastByte = IRB.CreateIntToPtr( IRB.CreateAdd(AddrLong, SizeMinusOne), Addr->getType()); - instrumentAddress(I, InsertBefore, Addr, {}, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, InsertBefore, Addr, {}, 8, IsWrite, Size, false, Exp, + RTCI); instrumentAddress(I, InsertBefore, LastByte, {}, 8, IsWrite, Size, false, - Exp); + Exp, RTCI); } } @@ -2881,6 +2915,8 @@ bool AddressSanitizer::instrumentFunction(Function &F, FunctionStateRAII CleanupObj(this); + RuntimeCallInserter RTCI(F); + FunctionModified |= maybeInsertDynamicShadowAtFunctionEntry(F); // We can't instrument allocas used with llvm.localescape. Only static allocas @@ -2963,27 +2999,27 @@ bool AddressSanitizer::instrumentFunction(Function &F, for (auto &Operand : OperandsToInstrument) { if (!suppressInstrumentationSiteForDebug(NumInstrumented)) instrumentMop(ObjSizeVis, Operand, UseCalls, - F.getParent()->getDataLayout()); + F.getParent()->getDataLayout(), RTCI); FunctionModified = true; } for (auto *Inst : IntrinToInstrument) { if (!suppressInstrumentationSiteForDebug(NumInstrumented)) - instrumentMemIntrinsic(Inst); + instrumentMemIntrinsic(Inst, RTCI); FunctionModified = true; } - FunctionStackPoisoner FSP(F, *this); + FunctionStackPoisoner FSP(F, *this, RTCI); bool ChangedStack = FSP.runOnFunction(); // We must unpoison the stack before NoReturn calls (throw, _exit, etc). // See e.g. https://github.com/google/sanitizers/issues/37 for (auto *CI : NoReturnCalls) { IRBuilder<> IRB(CI); - IRB.CreateCall(AsanHandleNoReturnFunc, {}); + RTCI.createRuntimeCall(IRB, AsanHandleNoReturnFunc, {}); } for (auto *Inst : PointerComparisonsOrSubtracts) { - instrumentPointerComparisonOrSubtraction(Inst); + instrumentPointerComparisonOrSubtraction(Inst, RTCI); FunctionModified = true; } @@ -3128,9 +3164,10 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, if (j - i >= ASan.MaxInlinePoisoningSize) { copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase); - IRB.CreateCall(AsanSetShadowFunc[Val], - {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), - ConstantInt::get(IntptrTy, j - i)}); + RTCI.createRuntimeCall( + IRB, AsanSetShadowFunc[Val], + {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), + ConstantInt::get(IntptrTy, j - i)}); Done = j; } } @@ -3417,8 +3454,8 @@ void FunctionStackPoisoner::processStaticAllocas() { StackMallocIdx = StackMallocSizeClass(LocalStackSize); assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass); Value *FakeStackValue = - IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx], - ConstantInt::get(IntptrTy, LocalStackSize)); + RTCI.createRuntimeCall(IRBIf, AsanStackMallocFunc[StackMallocIdx], + ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, ConstantInt::get(IntptrTy, 0)); @@ -3428,7 +3465,8 @@ void FunctionStackPoisoner::processStaticAllocas() { // void *LocalStackBase = (FakeStack) ? FakeStack : // alloca(LocalStackSize); StackMallocIdx = StackMallocSizeClass(LocalStackSize); - FakeStack = IRB.CreateCall(AsanStackMallocFunc[StackMallocIdx], + FakeStack = + RTCI.createRuntimeCall(IRB, AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); } Value *NoFakeStack = @@ -3563,8 +3601,8 @@ void FunctionStackPoisoner::processStaticAllocas() { IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getPtrTy())); } else { // For larger frames call __asan_stack_free_*. - IRBPoison.CreateCall( - AsanStackFreeFunc[StackMallocIdx], + RTCI.createRuntimeCall( + IRBPoison, AsanStackFreeFunc[StackMallocIdx], {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)}); } @@ -3585,8 +3623,8 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, // For now just insert the call to ASan runtime. Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); Value *SizeArg = ConstantInt::get(IntptrTy, Size); - IRB.CreateCall( - DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, + RTCI.createRuntimeCall( + IRB, DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, {AddrArg, SizeArg}); } @@ -3647,7 +3685,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { ConstantInt::get(IntptrTy, Alignment.value())); // Insert __asan_alloca_poison call for new created alloca. - IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); + RTCI.createRuntimeCall(IRB, AsanAllocaPoisonFunc, {NewAddress, OldSize}); // Store the last alloca's address to DynamicAllocaLayout. We'll need this // for unpoisoning stuff. From 8aed911fe91bb6cbfb95789683dadf3e77ea713a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 7 Mar 2024 15:31:40 +0100 Subject: [PATCH 047/158] [clang][Interp] Implement complex comparisons --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 106 ++++++++++++++++++++++- clang/lib/AST/Interp/ByteCodeExprGen.h | 2 + clang/test/AST/Interp/complex.c | 8 +- clang/test/AST/Interp/complex.cpp | 47 ++++++++++ 4 files changed, 157 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index d887170cbc5d2d..8872579e12dc82 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -393,12 +393,16 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { if (BO->isLogicalOp()) return this->VisitLogicalBinOp(BO); - if (BO->getType()->isAnyComplexType()) - return this->VisitComplexBinOp(BO); - const Expr *LHS = BO->getLHS(); const Expr *RHS = BO->getRHS(); + if (BO->getType()->isAnyComplexType()) + return this->VisitComplexBinOp(BO); + if ((LHS->getType()->isAnyComplexType() || + RHS->getType()->isAnyComplexType()) && + BO->isComparisonOp()) + return this->emitComplexComparison(LHS, RHS, BO); + if (BO->isPtrMemOp()) return this->visit(RHS); @@ -3410,6 +3414,102 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { return true; } +template +bool ByteCodeExprGen::emitComplexComparison(const Expr *LHS, + const Expr *RHS, + const BinaryOperator *E) { + assert(E->isComparisonOp()); + assert(!Initializing); + assert(!DiscardResult); + + PrimType ElemT; + bool LHSIsComplex; + unsigned LHSOffset; + if (LHS->getType()->isAnyComplexType()) { + LHSIsComplex = true; + ElemT = classifyComplexElementType(LHS->getType()); + LHSOffset = allocateLocalPrimitive(LHS, PT_Ptr, /*IsConst=*/true, + /*IsExtended=*/false); + if (!this->visit(LHS)) + return false; + if (!this->emitSetLocal(PT_Ptr, LHSOffset, E)) + return false; + } else { + LHSIsComplex = false; + PrimType LHST = classifyPrim(LHS->getType()); + LHSOffset = this->allocateLocalPrimitive(LHS, LHST, true, false); + if (!this->visit(LHS)) + return false; + if (!this->emitSetLocal(LHST, LHSOffset, E)) + return false; + } + + bool RHSIsComplex; + unsigned RHSOffset; + if (RHS->getType()->isAnyComplexType()) { + RHSIsComplex = true; + ElemT = classifyComplexElementType(RHS->getType()); + RHSOffset = allocateLocalPrimitive(RHS, PT_Ptr, /*IsConst=*/true, + /*IsExtended=*/false); + if (!this->visit(RHS)) + return false; + if (!this->emitSetLocal(PT_Ptr, RHSOffset, E)) + return false; + } else { + RHSIsComplex = false; + PrimType RHST = classifyPrim(RHS->getType()); + RHSOffset = this->allocateLocalPrimitive(RHS, RHST, true, false); + if (!this->visit(RHS)) + return false; + if (!this->emitSetLocal(RHST, RHSOffset, E)) + return false; + } + + auto getElem = [&](unsigned LocalOffset, unsigned Index, + bool IsComplex) -> bool { + if (IsComplex) { + if (!this->emitGetLocal(PT_Ptr, LocalOffset, E)) + return false; + return this->emitArrayElemPop(ElemT, Index, E); + } + return this->emitGetLocal(ElemT, LocalOffset, E); + }; + + for (unsigned I = 0; I != 2; ++I) { + // Get both values. + if (!getElem(LHSOffset, I, LHSIsComplex)) + return false; + if (!getElem(RHSOffset, I, RHSIsComplex)) + return false; + // And compare them. + if (!this->emitEQ(ElemT, E)) + return false; + + if (!this->emitCastBoolUint8(E)) + return false; + } + + // We now have two bool values on the stack. Compare those. + if (!this->emitAddUint8(E)) + return false; + if (!this->emitConstUint8(2, E)) + return false; + + if (E->getOpcode() == BO_EQ) { + if (!this->emitEQUint8(E)) + return false; + } else if (E->getOpcode() == BO_NE) { + if (!this->emitNEUint8(E)) + return false; + } else + return false; + + // In C, this returns an int. + if (PrimType ResT = classifyPrim(E->getType()); ResT != PT_Bool) + return this->emitCast(PT_Bool, ResT, E); + return true; +} + /// When calling this, we have a pointer of the local-to-destroy /// on the stack. /// Emit destruction of record types (or arrays of record types). diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index acbbcc3dc9619a..5977bb5e6ff25d 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -268,6 +268,8 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, bool emitComplexReal(const Expr *SubExpr); bool emitComplexBoolCast(const Expr *E); + bool emitComplexComparison(const Expr *LHS, const Expr *RHS, + const BinaryOperator *E); bool emitRecordDestruction(const Record *R); bool emitDestruction(const Descriptor *Desc); diff --git a/clang/test/AST/Interp/complex.c b/clang/test/AST/Interp/complex.c index b07d0241da12d6..c9c2efb5974531 100644 --- a/clang/test/AST/Interp/complex.c +++ b/clang/test/AST/Interp/complex.c @@ -1,9 +1,6 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both -Wno-unused-value %s // RUN: %clang_cc1 -verify=ref,both -Wno-unused-value %s -// expected-no-diagnostics -// ref-no-diagnostics - void blah() { __complex__ unsigned xx; __complex__ signed yy; @@ -12,3 +9,8 @@ void blah() { /// The following line calls into the constant interpreter. result = xx * yy; } + + +_Static_assert((0.0 + 0.0j) == (0.0 + 0.0j), ""); +_Static_assert((0.0 + 0.0j) != (0.0 + 0.0j), ""); // both-error {{static assertion}} \ + // both-note {{evaluates to}} diff --git a/clang/test/AST/Interp/complex.cpp b/clang/test/AST/Interp/complex.cpp index 8acce7b734d85a..6a42afc68d26c7 100644 --- a/clang/test/AST/Interp/complex.cpp +++ b/clang/test/AST/Interp/complex.cpp @@ -266,3 +266,50 @@ namespace Builtin { constexpr _Complex float C = __builtin_complex(10.0f, 20.0); // both-error {{arguments are of different types}} } + +namespace Cmp { + static_assert((0.0 + 0.0j) == (0.0 + 0.0j)); + static_assert((0.0 + 0.0j) != (0.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + + static_assert((0.0 + 0.0j) == 0.0); + static_assert(0.0 == (0.0 + 0.0j)); + static_assert(0.0 == 0.0j); + static_assert((0.0 + 1.0j) != 0.0); + static_assert(1.0 != (0.0 + 0.0j)); + static_assert(0.0 != 1.0j); + + // Walk around the complex plane stepping between angular differences and + // equality. + static_assert((1.0 + 0.0j) == (0.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 + 0.0j) == (1.0 + 0.0j)); + static_assert((1.0 + 1.0j) == (1.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 + 1.0j) == (1.0 + 1.0j)); + static_assert((0.0 + 1.0j) == (1.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((0.0 + 1.0j) == (0.0 + 1.0j)); + static_assert((-1.0 + 1.0j) == (0.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 + 1.0j) == (-1.0 + 1.0j)); + static_assert((-1.0 + 0.0j) == (-1.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 + 0.0j) == (-1.0 + 0.0j)); + static_assert((-1.0 - 1.0j) == (-1.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 - 1.0j) == (-1.0 - 1.0j)); + static_assert((0.0 - 1.0j) == (-1.0 - 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((0.0 - 1.0j) == (0.0 - 1.0j)); + static_assert((1.0 - 1.0j) == (0.0 - 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 - 1.0j) == (1.0 - 1.0j)); + + /// Make sure these are rejected before reaching the constexpr interpreter. + static_assert((0.0 + 0.0j) & (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) | (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) < (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) > (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) ^ (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} +} From 5d59fa90ce225814739d9b51ba37e1cca9204cad Mon Sep 17 00:00:00 2001 From: "Oleksandr \"Alex\" Zinenko" Date: Thu, 7 Mar 2024 17:14:08 +0100 Subject: [PATCH 048/158] Reapply "[mlir][py] better support for arith.constant construction" (#84142) Arithmetic constants for vector types can be constructed from objects implementing Python buffer protocol such as `array.array`. Note that until Python 3.12, there is no typing support for buffer protocol implementers, so the annotations use array explicitly. Reverts llvm/llvm-project#84103 --- mlir/python/mlir/dialects/arith.py | 30 ++++++++++++++-- mlir/test/python/dialects/arith_dialect.py | 40 ++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/mlir/python/mlir/dialects/arith.py b/mlir/python/mlir/dialects/arith.py index 61c6917393f1f9..92da5df9bce665 100644 --- a/mlir/python/mlir/dialects/arith.py +++ b/mlir/python/mlir/dialects/arith.py @@ -5,6 +5,8 @@ from ._arith_ops_gen import * from ._arith_ops_gen import _Dialect from ._arith_enum_gen import * +from array import array as _array +from typing import overload try: from ..ir import * @@ -43,13 +45,37 @@ def _is_float_type(type: Type): class ConstantOp(ConstantOp): """Specialization for the constant op class.""" + @overload + def __init__(self, value: Attribute, *, loc=None, ip=None): + ... + + @overload def __init__( - self, result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None + self, result: Type, value: Union[int, float, _array], *, loc=None, ip=None ): + ... + + def __init__(self, result, value, *, loc=None, ip=None): + if value is None: + assert isinstance(result, Attribute) + super().__init__(result, loc=loc, ip=ip) + return + if isinstance(value, int): super().__init__(IntegerAttr.get(result, value), loc=loc, ip=ip) elif isinstance(value, float): super().__init__(FloatAttr.get(result, value), loc=loc, ip=ip) + elif isinstance(value, _array): + if 8 * value.itemsize != result.element_type.width: + raise ValueError( + f"Mismatching array element ({8 * value.itemsize}) and type ({result.element_type.width}) width." + ) + if value.typecode in ["i", "l", "q"]: + super().__init__(DenseIntElementsAttr.get(value, type=result)) + elif value.typecode in ["f", "d"]: + super().__init__(DenseFPElementsAttr.get(value, type=result)) + else: + raise ValueError(f'Unsupported typecode: "{value.typecode}".') else: super().__init__(value, loc=loc, ip=ip) @@ -79,6 +105,6 @@ def literal_value(self) -> Union[int, float]: def constant( - result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None + result: Type, value: Union[int, float, Attribute, _array], *, loc=None, ip=None ) -> Value: return _get_op_result_or_op_results(ConstantOp(result, value, loc=loc, ip=ip)) diff --git a/mlir/test/python/dialects/arith_dialect.py b/mlir/test/python/dialects/arith_dialect.py index 8bb80eed2b8105..c9af5e7b46db84 100644 --- a/mlir/test/python/dialects/arith_dialect.py +++ b/mlir/test/python/dialects/arith_dialect.py @@ -4,6 +4,7 @@ from mlir.ir import * import mlir.dialects.arith as arith import mlir.dialects.func as func +from array import array def run(f): @@ -92,3 +93,42 @@ def __str__(self): b = a * a # CHECK: ArithValue(%2 = arith.mulf %cst_1, %cst_1 : f64) print(b) + + +# CHECK-LABEL: TEST: testArrayConstantConstruction +@run +def testArrayConstantConstruction(): + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + i32_array = array("i", [1, 2, 3, 4]) + i32 = IntegerType.get_signless(32) + vec_i32 = VectorType.get([2, 2], i32) + arith.constant(vec_i32, i32_array) + arith.ConstantOp(vec_i32, DenseIntElementsAttr.get(i32_array, type=vec_i32)) + + # "q" is the equivalent of `long long` in C and requires at least + # 64 bit width integers on both Linux and Windows. + i64_array = array("q", [5, 6, 7, 8]) + i64 = IntegerType.get_signless(64) + vec_i64 = VectorType.get([1, 4], i64) + arith.constant(vec_i64, i64_array) + arith.ConstantOp(vec_i64, DenseIntElementsAttr.get(i64_array, type=vec_i64)) + + f32_array = array("f", [1.0, 2.0, 3.0, 4.0]) + f32 = F32Type.get() + vec_f32 = VectorType.get([4, 1], f32) + arith.constant(vec_f32, f32_array) + arith.ConstantOp(vec_f32, DenseFPElementsAttr.get(f32_array, type=vec_f32)) + + f64_array = array("d", [1.0, 2.0, 3.0, 4.0]) + f64 = F64Type.get() + vec_f64 = VectorType.get([2, 1, 2], f64) + arith.constant(vec_f64, f64_array) + arith.ConstantOp(vec_f64, DenseFPElementsAttr.get(f64_array, type=vec_f64)) + + # CHECK-COUNT-2: arith.constant dense<[{{\[}}1, 2], [3, 4]]> : vector<2x2xi32> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}5, 6, 7, 8]]> : vector<1x4xi64> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}1.000000e+00], [2.000000e+00], [3.000000e+00], [4.000000e+00]]> : vector<4x1xf32> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}[1.000000e+00, 2.000000e+00]], [{{\[}}3.000000e+00, 4.000000e+00]]]> : vector<2x1x2xf64> + print(module) From c03fd37d9b61bc6063e4d6e983846f877e83ac67 Mon Sep 17 00:00:00 2001 From: Anchu Rajendran S Date: Thu, 7 Mar 2024 08:23:58 -0800 Subject: [PATCH 049/158] [flang] Changes to map variables in link clause of declare target (#83643) As per the OpenMP standard, "If a variable appears in a link clause on a declare target directive that does not have a device_type clause with the nohost device-type-description then it is treated as if it had appeared in a map clause with a map-type of tofrom" is an implicit mapping rule. Before this change, such variables were mapped as to by default. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 16 +++- .../OpenMP/declare-target-link-tarop-cap.f90 | 55 +++++++++++++ .../declare-target-array-in-target-region.f90 | 33 -------- .../declare-target-vars-in-target-region.f90 | 81 +++++++++++++++++++ 4 files changed, 151 insertions(+), 34 deletions(-) create mode 100644 flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 delete mode 100644 openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 create mode 100644 openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 185e0316870e94..5cff95c7d125b0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1120,7 +1120,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, if (auto refType = baseOp.getType().dyn_cast()) eleType = refType.getElementType(); - if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + // If a variable is specified in declare target link and if device + // type is not specified as `nohost`, it needs to be mapped tofrom + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); + auto declareTargetOp = + llvm::dyn_cast_if_present(op); + if (declareTargetOp && declareTargetOp.isDeclareTarget()) { + if (declareTargetOp.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link && + declareTargetOp.getDeclareTargetDeviceType() != + mlir::omp::DeclareTargetDeviceType::nohost) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; diff --git a/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 new file mode 100644 index 00000000000000..7cd0597161578d --- /dev/null +++ b/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 @@ -0,0 +1,55 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s + +program test_link + + integer :: test_int = 1 + !$omp declare target link(test_int) + + integer :: test_array_1d(3) = (/1,2,3/) + !$omp declare target link(test_array_1d) + + integer, pointer :: test_ptr1 + !$omp declare target link(test_ptr1) + + integer, target :: test_target = 1 + !$omp declare target link(test_target) + + integer, pointer :: test_ptr2 + !$omp declare target link(test_ptr2) + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref {name = "test_int"} + !$omp target + test_int = test_int + 1 + !$omp end target + + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref> {name = "test_array_1d"} + !$omp target + do i = 1,3 + test_array_1d(i) = i * 2 + end do + !$omp end target + + allocate(test_ptr1) + test_ptr1 = 1 + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>>, !fir.box>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr>) -> !fir.ref>> {name = "test_ptr1"} + !$omp target + test_ptr1 = test_ptr1 + 1 + !$omp end target + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref {name = "test_target"} + !$omp target + test_target = test_target + 1 + !$omp end target + + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>>, !fir.box>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr>) -> !fir.ref>> {name = "test_ptr2"} + test_ptr2 => test_target + !$omp target + test_ptr2 = test_ptr2 + 1 + !$omp end target + +end diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 deleted file mode 100644 index c09146198768b0..00000000000000 --- a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 +++ /dev/null @@ -1,33 +0,0 @@ -! Offloading test with a target region mapping a declare target -! Fortran array writing some values to it and checking the host -! correctly receives the updates made on the device. -! REQUIRES: flang -! UNSUPPORTED: nvptx64-nvidia-cuda-LTO -! UNSUPPORTED: aarch64-unknown-linux-gnu -! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -! UNSUPPORTED: x86_64-pc-linux-gnu -! UNSUPPORTED: x86_64-pc-linux-gnu-LTO - -! RUN: %libomptarget-compile-fortran-run-and-check-generic -module test_0 - implicit none - INTEGER :: sp(10) = (/0,0,0,0,0,0,0,0,0,0/) - !$omp declare target link(sp) -end module test_0 - -program main - use test_0 - integer :: i = 1 - integer :: j = 11 -!$omp target map(tofrom:sp, i, j) - do while (i <= j) - sp(i) = i; - i = i + 1 - end do -!$omp end target - -PRINT *, sp(:) - -end program - -! CHECK: 1 2 3 4 5 6 7 8 9 10 diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 new file mode 100644 index 00000000000000..f524deac3bcce9 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 @@ -0,0 +1,81 @@ +! Offloading test with a target region mapping a declare target +! Fortran array writing some values to it and checking the host +! correctly receives the updates made on the device. +! REQUIRES: flang +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +module test_0 + implicit none + INTEGER :: arr1(10) = (/0,0,0,0,0,0,0,0,0,0/) + INTEGER :: arr2(10) = (/0,0,0,0,0,0,0,0,0,0/) + !$omp declare target link(arr1) enter(arr2) + INTEGER :: scalar = 1 + !$omp declare target link(scalar) +end module test_0 + +subroutine test_with_array_link_and_tofrom() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(tofrom:arr1, i, j) + do while (i <= j) + arr1(i) = i; + i = i + 1 + end do + !$omp end target + + ! CHECK: 1 2 3 4 5 6 7 8 9 10 + PRINT *, arr1(:) +end subroutine test_with_array_link_and_tofrom + +subroutine test_with_array_link_only() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(i, j) + do while (i <= j) + arr1(i) = i + 1; + i = i + 1 + end do + !$omp end target + + ! CHECK: 2 3 4 5 6 7 8 9 10 11 + PRINT *, arr1(:) +end subroutine test_with_array_link_only + +subroutine test_with_array_enter_only() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(i, j) + do while (i <= j) + arr2(i) = i + 1; + i = i + 1 + end do + !$omp end target + + ! CHECK: 0 0 0 0 0 0 0 0 0 0 + PRINT *, arr2(:) +end subroutine test_with_array_enter_only + +subroutine test_with_scalar_link_only() + use test_0 + !$omp target + scalar = 10 + !$omp end target + + ! CHECK: 10 + PRINT *, scalar +end subroutine test_with_scalar_link_only + +program main + call test_with_array_link_and_tofrom() + call test_with_array_link_only() + call test_with_array_enter_only() + call test_with_scalar_link_only() +end program From 904a6aedca422d43f4f893bb97b2990e86b909e4 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 7 Mar 2024 08:31:16 -0800 Subject: [PATCH 050/158] [SLP][NFC]Add lshr version of the test with casting, NFC. --- .../X86/reorder-possible-strided-node.ll | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index b9ef17c49b7514..6f5d3d3785e0c8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -54,3 +54,57 @@ entry: store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4 ret void } + +define void @test1() { +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 +; CHECK-NEXT: ret void +; +entry: + %arrayidx1 = getelementptr i32, ptr null, i64 1 + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr i32, ptr null, i64 63 + %1 = load i32, ptr %arrayidx2, align 4 + %mul = mul i32 %1, %0 + %conv = sext i32 %mul to i64 + %shr = lshr i64 %conv, 0 + %conv3 = trunc i64 %shr to i32 + store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 + %arrayidx5 = getelementptr i32, ptr null, i64 33 + %2 = load i32, ptr %arrayidx5, align 4 + %arrayidx6 = getelementptr i32, ptr null, i64 62 + %3 = load i32, ptr %arrayidx6, align 4 + %mul7 = mul i32 %3, %2 + %conv8 = sext i32 %mul7 to i64 + %shr10 = lshr i64 %conv8, 0 + %conv11 = trunc i64 %shr10 to i32 + store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4 + %arrayidx13 = getelementptr i32, ptr null, i64 7 + %4 = load i32, ptr %arrayidx13, align 4 + %arrayidx14 = getelementptr i32, ptr null, i64 61 + %5 = load i32, ptr %arrayidx14, align 4 + %mul15 = mul i32 %5, %4 + %conv16 = sext i32 %mul15 to i64 + %shr18 = lshr i64 %conv16, 0 + %conv19 = trunc i64 %shr18 to i32 + store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8 + %6 = load i32, ptr null, align 4 + %arrayidx22 = getelementptr i32, ptr null, i64 60 + %7 = load i32, ptr %arrayidx22, align 4 + %mul23 = mul i32 %7, %6 + %conv24 = sext i32 %mul23 to i64 + %shr26 = lshr i64 %conv24, 0 + %conv27 = trunc i64 %shr26 to i32 + store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4 + ret void +} From 101a13df71734b06116846a3a39c0880eb33456d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 7 Mar 2024 08:38:04 -0800 Subject: [PATCH 051/158] [libc][stdbit] implement stdc_bit_floor (C23) (#84233) --- libc/config/linux/x86_64/entrypoints.txt | 5 +++++ libc/docs/stdbit.rst | 12 +++++----- libc/include/llvm-libc-macros/stdbit-macros.h | 20 +++++++++++++++++ libc/spec/stdc.td | 10 +++++++-- libc/src/__support/CPP/bit.h | 2 +- libc/src/stdbit/CMakeLists.txt | 1 + libc/src/stdbit/stdc_bit_floor_uc.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_bit_floor_uc.h | 18 +++++++++++++++ libc/src/stdbit/stdc_bit_floor_ui.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_bit_floor_ui.h | 18 +++++++++++++++ libc/src/stdbit/stdc_bit_floor_ul.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_bit_floor_ul.h | 18 +++++++++++++++ libc/src/stdbit/stdc_bit_floor_ull.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_bit_floor_ull.h | 18 +++++++++++++++ libc/src/stdbit/stdc_bit_floor_us.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_bit_floor_us.h | 18 +++++++++++++++ libc/test/include/stdbit_test.cpp | 17 ++++++++++++++ libc/test/src/stdbit/CMakeLists.txt | 1 + .../src/stdbit/stdc_bit_floor_uc_test.cpp | 22 +++++++++++++++++++ .../src/stdbit/stdc_bit_floor_ui_test.cpp | 21 ++++++++++++++++++ .../src/stdbit/stdc_bit_floor_ul_test.cpp | 21 ++++++++++++++++++ .../src/stdbit/stdc_bit_floor_ull_test.cpp | 21 ++++++++++++++++++ .../src/stdbit/stdc_bit_floor_us_test.cpp | 22 +++++++++++++++++++ 23 files changed, 357 insertions(+), 9 deletions(-) create mode 100644 libc/src/stdbit/stdc_bit_floor_uc.cpp create mode 100644 libc/src/stdbit/stdc_bit_floor_uc.h create mode 100644 libc/src/stdbit/stdc_bit_floor_ui.cpp create mode 100644 libc/src/stdbit/stdc_bit_floor_ui.h create mode 100644 libc/src/stdbit/stdc_bit_floor_ul.cpp create mode 100644 libc/src/stdbit/stdc_bit_floor_ul.h create mode 100644 libc/src/stdbit/stdc_bit_floor_ull.cpp create mode 100644 libc/src/stdbit/stdc_bit_floor_ull.h create mode 100644 libc/src/stdbit/stdc_bit_floor_us.cpp create mode 100644 libc/src/stdbit/stdc_bit_floor_us.h create mode 100644 libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp create mode 100644 libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp create mode 100644 libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp create mode 100644 libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp create mode 100644 libc/test/src/stdbit/stdc_bit_floor_us_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 705ec10960c4d2..bd2006ddb7e985 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -152,6 +152,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_bit_width_ui libc.src.stdbit.stdc_bit_width_ul libc.src.stdbit.stdc_bit_width_ull + libc.src.stdbit.stdc_bit_floor_uc + libc.src.stdbit.stdc_bit_floor_us + libc.src.stdbit.stdc_bit_floor_ui + libc.src.stdbit.stdc_bit_floor_ul + libc.src.stdbit.stdc_bit_floor_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst index ccd1393ef5d926..3ec46cf8d8ffa4 100644 --- a/libc/docs/stdbit.rst +++ b/libc/docs/stdbit.rst @@ -91,11 +91,11 @@ stdc_bit_width_us |check| stdc_bit_width_ui |check| stdc_bit_width_ul |check| stdc_bit_width_ull |check| -stdc_bit_floor_uc -stdc_bit_floor_us -stdc_bit_floor_ui -stdc_bit_floor_ul -stdc_bit_floor_ull +stdc_bit_floor_uc |check| +stdc_bit_floor_us |check| +stdc_bit_floor_ui |check| +stdc_bit_floor_ul |check| +stdc_bit_floor_ull |check| stdc_bit_ceil_uc stdc_bit_ceil_us stdc_bit_ceil_ui @@ -126,7 +126,7 @@ stdc_count_zeros |check| stdc_count_ones |check| stdc_has_single_bit |check| stdc_bit_width |check| -stdc_bit_floor +stdc_bit_floor |check| stdc_bit_ceil ========================= ========= diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h index 104418ca4856ba..5b51068f866b71 100644 --- a/libc/include/llvm-libc-macros/stdbit-macros.h +++ b/libc/include/llvm-libc-macros/stdbit-macros.h @@ -181,6 +181,19 @@ inline unsigned stdc_bit_width(unsigned long x) { return stdc_bit_width_ul(x); } inline unsigned stdc_bit_width(unsigned long long x) { return stdc_bit_width_ull(x); } +inline unsigned char stdc_bit_floor(unsigned char x) { + return stdc_bit_floor_uc(x); +} +inline unsigned short stdc_bit_floor(unsigned short x) { + return stdc_bit_floor_us(x); +} +inline unsigned stdc_bit_floor(unsigned x) { return stdc_bit_floor_ui(x); } +inline unsigned long stdc_bit_floor(unsigned long x) { + return stdc_bit_floor_ul(x); +} +inline unsigned long long stdc_bit_floor(unsigned long long x) { + return stdc_bit_floor_ull(x); +} #else #define stdc_leading_zeros(x) \ _Generic((x), \ @@ -266,6 +279,13 @@ inline unsigned stdc_bit_width(unsigned long long x) { unsigned: stdc_bit_width_ui, \ unsigned long: stdc_bit_width_ul, \ unsigned long long: stdc_bit_width_ull)(x) +#define stdc_bit_floor(x) \ + _Generic((x), \ + unsigned char: stdc_bit_floor_ui, \ + unsigned short: stdc_bit_floor_us, \ + unsigned: stdc_bit_floor_ui, \ + unsigned long: stdc_bit_floor_ul, \ + unsigned long long: stdc_bit_floor_ull)(x) #endif // __cplusplus #endif // __LLVM_LIBC_MACROS_STDBIT_MACROS_H diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index cfebc60a0a9a8e..a3a856a4ec8507 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -801,7 +801,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"stdc_count_zeros">, Macro<"stdc_count_ones">, Macro<"stdc_has_single_bit">, - Macro<"std_bit_width"> + Macro<"std_bit_width">, + Macro<"std_bit_floor"> ], // Macros [], // Types [], // Enumerations @@ -860,7 +861,12 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"stdc_bit_width_us", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_bit_width_ui", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_bit_width_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_ull", RetValSpec, [ArgSpec]> + FunctionSpec<"stdc_bit_width_ull", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_uc", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_us", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ui", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ul", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ull", RetValSpec, [ArgSpec]> ] // Functions >; diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index bc2f595845a95f..6b625b0c97a365 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -178,7 +178,7 @@ template bit_floor(T value) { if (!value) return 0; - return T(1) << (cpp::bit_width(value) - 1); + return static_cast(T(1) << (cpp::bit_width(value) - 1)); } /// Returns the smallest integral power of two no smaller than value if value is diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt index f077baeee6d275..7ab4fee4454a15 100644 --- a/libc/src/stdbit/CMakeLists.txt +++ b/libc/src/stdbit/CMakeLists.txt @@ -11,6 +11,7 @@ set(prefixes count_ones has_single_bit bit_width + bit_floor ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/src/stdbit/stdc_bit_floor_uc.cpp b/libc/src/stdbit/stdc_bit_floor_uc.cpp new file mode 100644 index 00000000000000..6cb04c9eb43e62 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_uc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_uc -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_uc.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned char, stdc_bit_floor_uc, (unsigned char value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_uc.h b/libc/src/stdbit/stdc_bit_floor_uc.h new file mode 100644 index 00000000000000..d6f53c5f699797 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_uc.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_uc -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H + +namespace LIBC_NAMESPACE { + +unsigned char stdc_bit_floor_uc(unsigned char value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H diff --git a/libc/src/stdbit/stdc_bit_floor_ui.cpp b/libc/src/stdbit/stdc_bit_floor_ui.cpp new file mode 100644 index 00000000000000..149b63f190cf37 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ui.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_ui -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ui.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_bit_floor_ui, (unsigned value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ui.h b/libc/src/stdbit/stdc_bit_floor_ui.h new file mode 100644 index 00000000000000..fcc606386f86d3 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ui.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ui -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_bit_floor_ui(unsigned value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H diff --git a/libc/src/stdbit/stdc_bit_floor_ul.cpp b/libc/src/stdbit/stdc_bit_floor_ul.cpp new file mode 100644 index 00000000000000..a29a044545684e --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ul.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_ul -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ul.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned long, stdc_bit_floor_ul, (unsigned long value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ul.h b/libc/src/stdbit/stdc_bit_floor_ul.h new file mode 100644 index 00000000000000..08327aa60c9069 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ul -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H + +namespace LIBC_NAMESPACE { + +unsigned long stdc_bit_floor_ul(unsigned long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H diff --git a/libc/src/stdbit/stdc_bit_floor_ull.cpp b/libc/src/stdbit/stdc_bit_floor_ull.cpp new file mode 100644 index 00000000000000..d1084b63573227 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_bit_floor_ull ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ull.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned long long, stdc_bit_floor_ull, + (unsigned long long value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ull.h b/libc/src/stdbit/stdc_bit_floor_ull.h new file mode 100644 index 00000000000000..8f360b23855ad6 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ull.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ull ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H + +namespace LIBC_NAMESPACE { + +unsigned long long stdc_bit_floor_ull(unsigned long long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H diff --git a/libc/src/stdbit/stdc_bit_floor_us.cpp b/libc/src/stdbit/stdc_bit_floor_us.cpp new file mode 100644 index 00000000000000..d1357a980e3a8a --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_us.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_us -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_us.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned short, stdc_bit_floor_us, (unsigned short value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_us.h b/libc/src/stdbit/stdc_bit_floor_us.h new file mode 100644 index 00000000000000..fcd0b9e3c549a1 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_us.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_us -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H + +namespace LIBC_NAMESPACE { + +unsigned short stdc_bit_floor_us(unsigned short value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index dfb7c97e3d9ee0..20820d52fbdede 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -91,6 +91,13 @@ unsigned stdc_bit_width_us(unsigned short) noexcept { return 0x4BU; } unsigned stdc_bit_width_ui(unsigned) noexcept { return 0x4CU; } unsigned stdc_bit_width_ul(unsigned long) noexcept { return 0x4DU; } unsigned stdc_bit_width_ull(unsigned long long) noexcept { return 0x4EU; } +unsigned char stdc_bit_floor_uc(unsigned char) noexcept { return 0x5AU; } +unsigned short stdc_bit_floor_us(unsigned short) noexcept { return 0x5BU; } +unsigned stdc_bit_floor_ui(unsigned) noexcept { return 0x5CU; } +unsigned long stdc_bit_floor_ul(unsigned long) noexcept { return 0x5DU; } +unsigned long long stdc_bit_floor_ull(unsigned long long) noexcept { + return 0x5EU; +} } #include "include/llvm-libc-macros/stdbit-macros.h" @@ -190,3 +197,13 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroBitWidth) { EXPECT_EQ(stdc_bit_width(1UL), 0x4DU); EXPECT_EQ(stdc_bit_width(1ULL), 0x4EU); } + +TEST(LlvmLibcStdbitTest, TypeGenericMacroBitFloor) { + EXPECT_EQ(stdc_bit_floor(static_cast(0U)), + static_cast(0x5AU)); + EXPECT_EQ(stdc_bit_floor(static_cast(0U)), + static_cast(0x5BU)); + EXPECT_EQ(stdc_bit_floor(0U), 0x5CU); + EXPECT_EQ(stdc_bit_floor(0UL), 0x5DUL); + EXPECT_EQ(stdc_bit_floor(0ULL), 0x5EULL); +} diff --git a/libc/test/src/stdbit/CMakeLists.txt b/libc/test/src/stdbit/CMakeLists.txt index f7e17d73229935..3aed56c0e92380 100644 --- a/libc/test/src/stdbit/CMakeLists.txt +++ b/libc/test/src/stdbit/CMakeLists.txt @@ -13,6 +13,7 @@ set(prefixes count_ones has_single_bit bit_width + bit_floor ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp new file mode 100644 index 00000000000000..254abd043d6e0c --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp @@ -0,0 +1,22 @@ +//===-- Unittests for stdc_bit_floor_uc -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_uc.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUcTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_uc(0U), + static_cast(0)); +} + +TEST(LlvmLibcStdcBitfloorUcTest, Ones) { + for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_uc(UCHAR_MAX >> i), + static_cast(1 << (UCHAR_WIDTH - i - 1))); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp new file mode 100644 index 00000000000000..53790402a9bda9 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ui -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ui.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUiTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ui(0U), 0U); +} + +TEST(LlvmLibcStdcBitfloorUiTest, Ones) { + for (unsigned i = 0U; i != INT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ui(UINT_MAX >> i), + 1U << (UINT_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp new file mode 100644 index 00000000000000..1c574437e02b79 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ul -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ul.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUlTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ul(0UL), 0UL); +} + +TEST(LlvmLibcStdcBitfloorUlTest, Ones) { + for (unsigned i = 0U; i != ULONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ul(ULONG_MAX >> i), + 1UL << (ULONG_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp new file mode 100644 index 00000000000000..4717d427a40a72 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ull ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ull.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUllTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ull(0ULL), 0ULL); +} + +TEST(LlvmLibcStdcBitfloorUllTest, Ones) { + for (unsigned i = 0U; i != ULLONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ull(ULLONG_MAX >> i), + 1ULL << (ULLONG_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp new file mode 100644 index 00000000000000..4df87fb079ba76 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp @@ -0,0 +1,22 @@ +//===-- Unittests for stdc_bit_floor_us -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_us.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUsTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_us(0U), + static_cast(0)); +} + +TEST(LlvmLibcStdcBitfloorUsTest, Ones) { + for (unsigned i = 0U; i != USHRT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_us(USHRT_MAX >> i), + static_cast(1 << (USHRT_WIDTH - i - 1))); +} From b1f2e19dc27d339cbeb3373066e73a4c91d133dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 7 Mar 2024 17:29:42 +0100 Subject: [PATCH 052/158] [clang][Interp][NFC] Use ArrayElem{,Pop} ops more often Instead of the longer ArrayElemPtr + Load. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 57 +++++++++--------------- clang/lib/AST/Interp/Interp.h | 14 ++++++ clang/lib/AST/Interp/Opcodes.td | 8 ++++ 3 files changed, 42 insertions(+), 37 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 8872579e12dc82..712218f5de2e42 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -314,11 +314,7 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { for (unsigned I = 0; I != 2; ++I) { if (!this->emitGetLocal(PT_Ptr, *SubExprOffset, CE)) return false; - if (!this->emitConstUint8(I, CE)) - return false; - if (!this->emitArrayElemPtrPopUint8(CE)) - return false; - if (!this->emitLoadPop(SourceElemT, CE)) + if (!this->emitArrayElemPop(SourceElemT, I, CE)) return false; // Do the cast. @@ -729,11 +725,8 @@ bool ByteCodeExprGen::VisitComplexBinOp(const BinaryOperator *E) { if (IsComplex) { if (!this->emitGetLocal(PT_Ptr, Offset, E)) return false; - if (!this->emitConstUint8(ElemIndex, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; - return this->emitLoadPop(classifyComplexElementType(E->getType()), E); + return this->emitArrayElemPop(classifyComplexElementType(E->getType()), + ElemIndex, E); } if (ElemIndex == 0) return this->emitGetLocal(classifyPrim(E->getType()), Offset, E); @@ -3127,16 +3120,16 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { if (!this->visit(SubExpr)) return false; - if (!this->emitConstUint8(1, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; + + if (SubExpr->isLValue()) { + if (!this->emitConstUint8(1, E)) + return false; + return this->emitArrayElemPtrPopUint8(E); + } // Since our _Complex implementation does not map to a primitive type, // we sometimes have to do the lvalue-to-rvalue conversion here manually. - if (!SubExpr->isLValue()) - return this->emitLoadPop(classifyPrim(E->getType()), E); - return true; + return this->emitArrayElemPop(classifyPrim(E->getType()), 1, E); } case UO_Extension: return this->delegate(SubExpr); @@ -3347,17 +3340,15 @@ bool ByteCodeExprGen::emitComplexReal(const Expr *SubExpr) { if (!this->visit(SubExpr)) return false; - if (!this->emitConstUint8(0, SubExpr)) - return false; - if (!this->emitArrayElemPtrPopUint8(SubExpr)) - return false; + if (SubExpr->isLValue()) { + if (!this->emitConstUint8(0, SubExpr)) + return false; + return this->emitArrayElemPtrPopUint8(SubExpr); + } - // Since our _Complex implementation does not map to a primitive type, - // we sometimes have to do the lvalue-to-rvalue conversion here manually. - if (!SubExpr->isLValue()) - return this->emitLoadPop(classifyComplexElementType(SubExpr->getType()), - SubExpr); - return true; + // Rvalue, load the actual element. + return this->emitArrayElemPop(classifyComplexElementType(SubExpr->getType()), + 0, SubExpr); } template @@ -3366,11 +3357,7 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { PrimType ElemT = classifyComplexElementType(E->getType()); // We emit the expression (__real(E) != 0 || __imag(E) != 0) // for us, that means (bool)E[0] || (bool)E[1] - if (!this->emitConstUint8(0, E)) - return false; - if (!this->emitArrayElemPtrUint8(E)) - return false; - if (!this->emitLoadPop(ElemT, E)) + if (!this->emitArrayElem(ElemT, 0, E)) return false; if (ElemT == PT_Float) { if (!this->emitCastFloatingIntegral(PT_Bool, E)) @@ -3385,11 +3372,7 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { if (!this->jumpTrue(LabelTrue)) return false; - if (!this->emitConstUint8(1, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; - if (!this->emitLoadPop(ElemT, E)) + if (!this->emitArrayElemPop(ElemT, 1, E)) return false; if (ElemT == PT_Float) { if (!this->emitCastFloatingIntegral(PT_Bool, E)) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 43cbc2ff292c09..bb220657c2dadc 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1959,10 +1959,24 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { return NarrowPtr(S, OpPC); } +template ::T> +inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) { + const Pointer &Ptr = S.Stk.peek(); + + if (!CheckLoad(S, OpPC, Ptr)) + return false; + + S.Stk.push(Ptr.atIndex(Index).deref()); + return true; +} + template ::T> inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) { const Pointer &Ptr = S.Stk.pop(); + if (!CheckLoad(S, OpPC, Ptr)) + return false; + S.Stk.push(Ptr.atIndex(Index).deref()); return true; } diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index ffc54646f0279e..9b99aa0ccb558a 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -368,6 +368,14 @@ def ArrayElemPop : Opcode { let HasGroup = 1; } +def ArrayElem : Opcode { + let Args = [ArgUint32]; + let Types = [AllTypeClass]; + let HasGroup = 1; +} + + + //===----------------------------------------------------------------------===// // Direct field accessors //===----------------------------------------------------------------------===// From 5c752df1e10b7af0684e549601f0a8dccffcfcf0 Mon Sep 17 00:00:00 2001 From: Gheorghe-Teodor Bercea Date: Thu, 7 Mar 2024 12:01:42 -0500 Subject: [PATCH 053/158] [libomptarget][nextgen-plugin][NFC] Clean-up InputSignal checks (#83458) Clean-up InputSignal checks. --- .../plugins-nextgen/amdgpu/src/rtl.cpp | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 81634ae1edc490..fce7454bf2800d 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -715,16 +715,12 @@ struct AMDGPUQueueTy { std::lock_guard Lock(Mutex); assert(Queue && "Interacted with a non-initialized queue!"); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Add a barrier packet before the kernel packet in case there is a pending // preceding operation. The barrier packet will delay the processing of // subsequent queue's packets until the barrier input signal are satisfied. // No need output signal needed because the dependency is already guaranteed // by the queue barrier itself. - if (InputSignal) + if (InputSignal && InputSignal->load()) if (auto Err = pushBarrierImpl(nullptr, InputSignal)) return Err; @@ -1254,12 +1250,8 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Issue the async memory copy. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { hsa_signal_t InputSignalRaw = InputSignal->get(); return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Src, Agent, CopySize, 1, &InputSignalRaw, @@ -1293,17 +1285,13 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignals[0]); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Setup the post action for releasing the intermediate buffer. if (auto Err = Slots[Curr].schedReleaseBuffer(Inter, MemoryManager)) return Err; // Issue the first step: device to host transfer. Avoid defining the input // dependency if already satisfied. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { hsa_signal_t InputSignalRaw = InputSignal->get(); if (auto Err = utils::asyncMemCopy( UseMultipleSdmaEngines, Inter, Agent, Src, Agent, CopySize, 1, @@ -1361,12 +1349,8 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Issue the first step: host to host transfer. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { // The std::memcpy is done asynchronously using an async handler. We store // the function's information in the action but it is not actually a // post action. @@ -1429,10 +1413,6 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // The agents need to have access to the corresponding memory // This is presently only true if the pointers were originally // allocated by this runtime or the caller made the appropriate From d1fc59c3b5c5ce292a6060d7a5545094cdf1b5fc Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 7 Mar 2024 17:04:12 +0000 Subject: [PATCH 054/158] [mlir][ArmSME] Rewrite illegal `shape_casts` to `vector.transpose` ops (#82985) This adds a rewrite that converts illegal 2D unit-dim `shape_casts` into `vector.transpose` ops. E.g. ```mlir // Case 1: %a = vector.shape_cast %0 : vector<[4]x1xf32> to vector<1x[4]xf32> // Case 2: %b = vector.shape_cast %1 : vector<[4]x1xf32> to vector<[4]xf32> ``` Becomes: ```mlir // Case 1: %a = vector.transpose %0 : [1, 0] vector<[4]x1xf32> to vector<1x[4]xf32> // Case 2: %t = vector.transpose %1 : [1, 0] vector<[4]x1xf32> to vector<1x[4]xf32> %b = vector.shape_cast %t : vector<1x[4]xf32> to vector<[4]xf32> ``` Various lowerings and drop unit-dims patterns add such shape_casts, however, if they do not cancel out (which they likely won't if we've reached the vector-legalization pass) they will prevent lowering the IR. Rewriting them as a transpose gives `LiftIllegalVectorTransposeToMemory` a chance to eliminate the illegal types. --- .../ArmSME/Transforms/VectorLegalization.cpp | 85 ++++++++++++++++--- .../Dialect/ArmSME/vector-legalization.mlir | 45 ++++++++++ 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 11f8bc04b21844..31500c62c0d600 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -46,6 +46,8 @@ static constexpr StringLiteral kMatchFailureUnsupportedMaskOp( "op mask is unsupported for legalization/decomposition"); static constexpr StringLiteral kMatchFailureNonPermutationMap("op affine map is not a permutation"); +static constexpr StringLiteral kMatchFailureNotIllegalToLegal( + "expected transpose from illegal type to legal type"); /// An SMESubTile represents a single SME-sized sub-tile from decomposing a /// larger vector type. The (`row`, `col`) are the position of the tile in the @@ -416,6 +418,17 @@ struct FoldExtractFromVectorOfSMELikeCreateMasks } }; +/// A vector type where no fixed dimension comes after a scalable dimension. +bool isLegalVectorType(VectorType vType) { + bool seenFixedDim = false; + for (bool scalableFlag : llvm::reverse(vType.getScalableDims())) { + seenFixedDim |= !scalableFlag; + if (seenFixedDim && scalableFlag) + return false; + } + return true; +} + /// Lifts an illegal vector.transpose and vector.transfer_read to a /// memref.subview + memref.transpose, followed by a legal read. /// @@ -448,16 +461,6 @@ struct LiftIllegalVectorTransposeToMemory : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - static bool isIllegalVectorType(VectorType vType) { - bool seenFixedDim = false; - for (bool scalableFlag : llvm::reverse(vType.getScalableDims())) { - seenFixedDim |= !scalableFlag; - if (seenFixedDim && scalableFlag) - return true; - } - return false; - } - static Value getExtensionSource(Operation *op) { if (isa_and_present(op)) return op->getOperand(0); @@ -468,9 +471,9 @@ struct LiftIllegalVectorTransposeToMemory PatternRewriter &rewriter) const override { auto sourceType = transposeOp.getSourceVectorType(); auto resultType = transposeOp.getResultVectorType(); - if (!isIllegalVectorType(sourceType) || isIllegalVectorType(resultType)) - return rewriter.notifyMatchFailure( - transposeOp, "expected transpose from illegal type to legal type"); + if (isLegalVectorType(sourceType) || !isLegalVectorType(resultType)) + return rewriter.notifyMatchFailure(transposeOp, + kMatchFailureNotIllegalToLegal); // Look through extend for transfer_read. Value maybeRead = transposeOp.getVector(); @@ -556,6 +559,59 @@ struct LiftIllegalVectorTransposeToMemory } }; +/// A rewrite to turn unit dim transpose-like vector.shape_casts into +/// vector.transposes. The shape_cast has to be from an illegal vector type to a +/// legal one (as defined by isLegalVectorType). +/// +/// The reasoning for this is if we've got to this pass and we still have +/// shape_casts of illegal types, then they likely will not cancel out. Turning +/// them into transposes gives LiftIllegalVectorTransposeToMemory a chance to +/// eliminate them. +/// +/// Example: +/// +/// BEFORE: +/// ```mlir +/// %0 = vector.shape_cast %a : vector<[4]x1xf32> to vector<1x[4]xf32> +/// ``` +/// +/// AFTER: +/// ```mlir +/// %0 = vector.transpose %0, [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> +/// ``` +struct ConvertIllegalShapeCastOpsToTransposes + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp shapeCastOp, + PatternRewriter &rewriter) const override { + auto sourceType = shapeCastOp.getSourceVectorType(); + auto resultType = shapeCastOp.getResultVectorType(); + if (isLegalVectorType(sourceType) || !isLegalVectorType(resultType)) + return rewriter.notifyMatchFailure(shapeCastOp, + kMatchFailureNotIllegalToLegal); + + // Note: If we know that `sourceType` is an illegal vector type (and 2D) + // then dim 0 is scalable and dim 1 is fixed. + if (sourceType.getRank() != 2 || sourceType.getDimSize(1) != 1) + return rewriter.notifyMatchFailure( + shapeCastOp, "expected source to be a 2D scalable vector with a " + "trailing unit dim"); + + auto loc = shapeCastOp.getLoc(); + auto transpose = rewriter.create( + loc, shapeCastOp.getSource(), ArrayRef{1, 0}); + + if (resultType.getRank() == 1) + rewriter.replaceOpWithNewOp(shapeCastOp, resultType, + transpose); + else + rewriter.replaceOp(shapeCastOp, transpose); + + return success(); + } +}; + struct VectorLegalizationPass : public arm_sme::impl::VectorLegalizationBase { void runOnOperation() override { @@ -576,7 +632,8 @@ struct VectorLegalizationPass }); patterns.add(context); + LiftIllegalVectorTransposeToMemory, + ConvertIllegalShapeCastOpsToTransposes>(context); // Note: High benefit to ensure masked outer products are lowered first. patterns.add( converter, context, 1024); diff --git a/mlir/test/Dialect/ArmSME/vector-legalization.mlir b/mlir/test/Dialect/ArmSME/vector-legalization.mlir index bf0b58ff4cf073..f8be697548c197 100644 --- a/mlir/test/Dialect/ArmSME/vector-legalization.mlir +++ b/mlir/test/Dialect/ArmSME/vector-legalization.mlir @@ -388,3 +388,48 @@ func.func @illegal_transpose_no_defining_source_op(%vec: vector<[4]x1xf32>) -> v %0 = vector.transpose %vec, [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> return %0 : vector<1x[4]xf32> } + +// ----- + +// CHECK-LABEL: @illegal_shape_cast_to_transpose_2d( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) +func.func @illegal_shape_cast_to_transpose_2d(%vec: vector<[4]x1xf32>) -> vector<1x[4]xf32> { + // CHECK: vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> + %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<1x[4]xf32> + return %0 : vector<1x[4]xf32> +} + +// ----- + +// CHECK-LABEL: @illegal_shape_cast_to_transpose_1d( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) +func.func @illegal_shape_cast_to_transpose_1d(%vec: vector<[4]x1xf32>) -> vector<[4]xf32> { + // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> + // CHECK: vector.shape_cast %[[TRANSPOSE]] : vector<1x[4]xf32> to vector<[4]xf32> + %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<[4]xf32> + return %0 : vector<[4]xf32> +} + +// ----- + +// CHECK-LABEL: @lift_illegal_2d_shape_cast_to_memory +func.func @lift_illegal_2d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<1x[4]xf32> { + // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> + // CHECK-NOT: vector.shape_cast + %pad = arith.constant 0.0 : f32 + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<1x[4]xf32> + return %cast : vector<1x[4]xf32> +} + +// ----- + +// CHECK-LABEL: @lift_illegal_1d_shape_cast_to_memory +func.func @lift_illegal_1d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<[4]xf32> { + // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> + // CHECK-NOT: vector.shape_cast {{.*}} : vector<[4]x1xf32> to vector<[4]xf32> + %pad = arith.constant 0.0 : f32 + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<[4]xf32> + return %cast : vector<[4]xf32> +} From 630289f77d67703673928ae38d3e5ba900e9ff62 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 11:04:40 -0600 Subject: [PATCH 055/158] [HIP] Do not include the CUID module hash with the new driver (#84332) Summary: The new driver does not need this hash and it can lead to redefined symbol errors when the CUID hash isn't set. --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d02875c6a86d77..967319bdfc4571 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -916,7 +916,7 @@ void CodeGenModule::Release() { llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external"); addCompilerUsedGlobal(GV); } - if (LangOpts.HIP) { + if (LangOpts.HIP && !getLangOpts().OffloadingNewDriver) { // Emit a unique ID so that host and device binaries from the same // compilation unit can be associated. auto *GV = new llvm::GlobalVariable( From 8f79cdd8da97c131ae7d8a3210bb69cb6654903d Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Mar 2024 17:02:23 +0000 Subject: [PATCH 056/158] [AArch64] Add -verify-machineinstrs to a test This would have helped identify problems with #83905 which only showed up in an LLVM_ENABLE_EXPENSIVE_CHECKS build. --- llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir index f50bd9ab4b8a1b..f2d79bd7206908 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir +++ b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -o - | FileCheck %s +# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -verify-machineinstrs -o - | FileCheck %s --- | target triple = "aarch64-linux" From 96049fcf4e5f2eb0271bdfa89e113eef9c5fa9f6 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 7 Mar 2024 09:50:29 -0500 Subject: [PATCH 057/158] [GISEL] Add IRTranslation for shufflevector on scalable vector types (#80378) Recommits llvm/llvm-project#80378 which was reverted in llvm/llvm-project#84330. The problem was that the change in llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir used 217 as an opcode instead of a regex. --- llvm/docs/GlobalISel/GenericOpcode.rst | 5 + .../CodeGen/GlobalISel/MachineIRBuilder.h | 12 +- llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 7 + llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 4 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 27 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 16 +- llvm/lib/CodeGen/MachineVerifier.cpp | 18 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- .../GlobalISel/legalizer-info-validation.mir | 3 + .../GlobalISel/irtranslator/shufflevector.ll | 1774 +++++++++++++++++ .../MachineVerifier/test_g_splat_vector.mir | 27 + .../GlobalISel/LegalizerHelperTest.cpp | 4 +- .../CodeGen/GlobalISel/PatternMatchTest.cpp | 6 +- 15 files changed, 1890 insertions(+), 21 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll create mode 100644 llvm/test/MachineVerifier/test_g_splat_vector.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 33b0152bd7b49c..dda367607d0432 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -639,6 +639,11 @@ Concatenate two vectors and shuffle the elements according to the mask operand. The mask operand should be an IR Constant which exactly matches the corresponding mask for the IR shufflevector instruction. +G_SPLAT_VECTOR +^^^^^^^^^^^^^^^^ + +Create a vector where all elements are the scalar from the source operand. + Vector Reduction Operations --------------------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 1387a0a37561c4..6762b1b360d5e8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1063,8 +1063,7 @@ class MachineIRBuilder { /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements - MachineInstrBuilder buildSplatVector(const DstOp &Res, - const SrcOp &Src); + MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src); /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ... /// @@ -1099,6 +1098,15 @@ class MachineIRBuilder { MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef Mask); + /// Build and insert \p Res = G_SPLAT_VECTOR \p Val + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with vector type. + /// \pre \p Val must be a generic virtual register with scalar type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val); + /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 6aded2ceebe13a..94fba491148b2e 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -736,6 +736,9 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) /// Generic shufflevector. HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) +/// Generic splatvector. +HANDLE_TARGET_OPCODE(G_SPLAT_VECTOR) + /// Generic count trailing zeroes. HANDLE_TARGET_OPCODE(G_CTTZ) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index d2036e478d18f2..d967885aa2d758 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1450,6 +1450,13 @@ def G_SHUFFLE_VECTOR: GenericInstruction { let hasSideEffects = false; } +// Generic splatvector. +def G_SPLAT_VECTOR: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$val); + let hasSideEffects = false; +} + //------------------------------------------------------------------------------ // Vector reductions //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 64e2d517e3b9c4..1869e0d41a51f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -309,7 +309,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); @@ -336,7 +336,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7c986dbbc2c7c8..365870f540daeb 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1598,10 +1598,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // We might need to splat the base pointer into a vector if the offsets // are vectors. if (WantSplatVector && !PtrTy.isVector()) { - BaseReg = - MIRBuilder - .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) - .getReg(0); + BaseReg = MIRBuilder + .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy), + BaseReg) + .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); OffsetIRTy = DL->getIndexType(PtrIRTy); @@ -1639,8 +1639,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder.buildSplatVector( - OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + IdxReg = MIRBuilder + .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy), + IdxReg) + .getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -2997,6 +2999,19 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { + // A ShuffleVector that has operates on scalable vectors is a splat vector + // where the value of the splat vector is the 0th element of the first + // operand, since the index mask operand is the zeroinitializer (undef and + // poison are treated as zeroinitializer here). + if (U.getOperand(0)->getType()->isScalableTy()) { + Value *Op0 = U.getOperand(0); + auto SplatVal = MIRBuilder.buildExtractVectorElementConstant( + LLT::scalar(Op0->getType()->getScalarSizeInBits()), + getOrCreateVReg(*Op0), 0); + MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal); + return true; + } + ArrayRef Mask; if (auto *SVI = dyn_cast(&U)) Mask = SVI->getShuffleMask(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d016e684c48f6..2ec47f72aca39a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8391,7 +8391,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { // For vector types create a G_BUILD_VECTOR. if (Ty.isVector()) - Val = MIB.buildSplatVector(Ty, Val).getReg(0); + Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0); return Val; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index cdd605a5221ad8..a5a136e2effc60 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -326,7 +326,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, auto Const = buildInstr(TargetOpcode::G_CONSTANT) .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addCImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_CONSTANT); @@ -363,7 +363,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addFPImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); @@ -711,8 +711,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, - const SrcOp &Src) { +MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res, + const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } @@ -742,6 +742,14 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); } +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { + LLT DstTy = Res.getLLTTy(*getMRI()); + assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && + "Expected Src to match Dst elt ty"); + return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); +} + MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1d0757c5d7f5f5..ecb3bd33bdfd49 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1640,6 +1640,24 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + + case TargetOpcode::G_SPLAT_VECTOR: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + + if (!DstTy.isScalableVector()) + report("Destination type must be a scalable vector", MI); + + if (!SrcTy.isScalar()) + report("Source type must be a scalar", MI); + + if (DstTy.getScalarType() != SrcTy) + report("Element type of the destination must be the same type as the " + "source type", + MI); + + break; + } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 750d70c03eabd7..4713bd605c243b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20920,7 +20920,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { unsigned Op = Inst.getOpcode(); if (Op == Instruction::Add || Op == Instruction::Sub || Op == Instruction::And || Op == Instruction::Or || - Op == Instruction::Xor || Op == Instruction::InsertElement) + Op == Instruction::Xor || Op == Instruction::InsertElement || + Op == Instruction::Xor || Op == Instruction::ShuffleVector) return false; if (Inst.getType()->isScalableTy()) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d87704cf45d5d5..ecad3f11513487 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -625,6 +625,9 @@ # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_SPLAT_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll new file mode 100644 index 00000000000000..df7778899b0d09 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll @@ -0,0 +1,1774 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s + +define @shufflevector_nxv1i1_0() { + ; RV32-LABEL: name: shufflevector_nxv1i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i1_1() { + ; RV32-LABEL: name: shufflevector_nxv1i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i1_0() { + ; RV32-LABEL: name: shufflevector_nxv2i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i1_1() { + ; RV32-LABEL: name: shufflevector_nxv2i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i1_0() { + ; RV32-LABEL: name: shufflevector_nxv4i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i1_1() { + ; RV32-LABEL: name: shufflevector_nxv4i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i1_0() { + ; RV32-LABEL: name: shufflevector_nxv8i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i1_1() { + ; RV32-LABEL: name: shufflevector_nxv8i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i1_0() { + ; RV32-LABEL: name: shufflevector_nxv16i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i1_1() { + ; RV32-LABEL: name: shufflevector_nxv16i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i8_0() { + ; RV32-LABEL: name: shufflevector_nxv1i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i8_1() { + ; RV32-LABEL: name: shufflevector_nxv1i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i8_0() { + ; RV32-LABEL: name: shufflevector_nxv2i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i8_1() { + ; RV32-LABEL: name: shufflevector_nxv2i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i8_0() { + ; RV32-LABEL: name: shufflevector_nxv4i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i8_1() { + ; RV32-LABEL: name: shufflevector_nxv4i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i8_0() { + ; RV32-LABEL: name: shufflevector_nxv8i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i8_1() { + ; RV32-LABEL: name: shufflevector_nxv8i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i8_0() { + ; RV32-LABEL: name: shufflevector_nxv16i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i8_1() { + ; RV32-LABEL: name: shufflevector_nxv16i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i16_0() { + ; RV32-LABEL: name: shufflevector_nxv1i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i16_1() { + ; RV32-LABEL: name: shufflevector_nxv1i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i16_0() { + ; RV32-LABEL: name: shufflevector_nxv2i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i16_1() { + ; RV32-LABEL: name: shufflevector_nxv2i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i16_0() { + ; RV32-LABEL: name: shufflevector_nxv4i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i16_1() { + ; RV32-LABEL: name: shufflevector_nxv4i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i16_0() { + ; RV32-LABEL: name: shufflevector_nxv8i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i16_1() { + ; RV32-LABEL: name: shufflevector_nxv8i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i16_0() { + ; RV32-LABEL: name: shufflevector_nxv16i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i16_1() { + ; RV32-LABEL: name: shufflevector_nxv16i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i32_0() { + ; RV32-LABEL: name: shufflevector_nxv1i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i32_1() { + ; RV32-LABEL: name: shufflevector_nxv1i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i32_0() { + ; RV32-LABEL: name: shufflevector_nxv2i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i32_1() { + ; RV32-LABEL: name: shufflevector_nxv2i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i32_0() { + ; RV32-LABEL: name: shufflevector_nxv4i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i32_1() { + ; RV32-LABEL: name: shufflevector_nxv4i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i32_0() { + ; RV32-LABEL: name: shufflevector_nxv8i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i32_1() { + ; RV32-LABEL: name: shufflevector_nxv8i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i32_0() { + ; RV32-LABEL: name: shufflevector_nxv16i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i32_1() { + ; RV32-LABEL: name: shufflevector_nxv16i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i64_0() { + ; RV32-LABEL: name: shufflevector_nxv1i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i64_1() { + ; RV32-LABEL: name: shufflevector_nxv1i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i64_0() { + ; RV32-LABEL: name: shufflevector_nxv2i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i64_1() { + ; RV32-LABEL: name: shufflevector_nxv2i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i64_0() { + ; RV32-LABEL: name: shufflevector_nxv4i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i64_1() { + ; RV32-LABEL: name: shufflevector_nxv4i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i64_0() { + ; RV32-LABEL: name: shufflevector_nxv8i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i64_1() { + ; RV32-LABEL: name: shufflevector_nxv8i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i64_0() { + ; RV32-LABEL: name: shufflevector_nxv16i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i64_1() { + ; RV32-LABEL: name: shufflevector_nxv16i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8, $v16m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV32-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8, $v16m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV64-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + + + diff --git a/llvm/test/MachineVerifier/test_g_splat_vector.mir b/llvm/test/MachineVerifier/test_g_splat_vector.mir new file mode 100644 index 00000000000000..0d1d8a3e6dcc64 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_splat_vector.mir @@ -0,0 +1,27 @@ +# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: g_splat_vector +tracksRegLiveness: true +liveins: +body: | + bb.0: + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_() = G_IMPLICIT_DEF + + ; CHECK: Destination type must be a scalable vector + %3:_(s32) = G_SPLAT_VECTOR %0 + + ; CHECK: Destination type must be a scalable vector + %4:_(<2 x s32>) = G_SPLAT_VECTOR %0 + + ; CHECK: Source type must be a scalar + %5:_() = G_SPLAT_VECTOR %1 + + ; CHECK: Source type must be a scalar + %6:_() = G_SPLAT_VECTOR %2 + + ; CHECK: Element type of the destination must be the same type as the source type + %7:_() = G_SPLAT_VECTOR %0 +... diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 73837279701a97..33155d2c9a9642 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -147,9 +147,9 @@ TEST_F(AArch64GISelMITest, LowerRotatesVector) { LLT S32 = LLT::scalar(32); LLT V4S32 = LLT::fixed_vector(4, S32); auto SrcTrunc = B.buildTrunc(S32, Copies[0]); - auto Src = B.buildSplatVector(V4S32, SrcTrunc); + auto Src = B.buildSplatBuildVector(V4S32, SrcTrunc); auto AmtTrunc = B.buildTrunc(S32, Copies[1]); - auto Amt = B.buildSplatVector(V4S32, AmtTrunc); + auto Amt = B.buildSplatBuildVector(V4S32, AmtTrunc); auto ROTR = B.buildInstr(TargetOpcode::G_ROTR, {V4S32}, {Src, Amt}); AInfo Info(MF->getSubtarget()); diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index f52e49df0bcdee..59a86fa5646f36 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -61,7 +61,7 @@ TEST_F(AArch64GISelMITest, MatchIntConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); int64_t Cst; EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_ICstOrSplat(Cst))); EXPECT_EQ(Cst, 42); @@ -625,7 +625,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_SpecificICstSplat(42))); @@ -655,7 +655,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantOrSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE( From 3714f937b835c06c8c32ca4f3f61ba2317db2296 Mon Sep 17 00:00:00 2001 From: Edgar Date: Thu, 7 Mar 2024 18:10:46 +0100 Subject: [PATCH 058/158] [MLIR] Add llvm (debug) attributes to CAPI (#83992) This PR adds the following to the mlir c api: - The disctinct mlir builtin attribute. - LLVM attributes (mostly debug related ones) --- mlir/include/mlir-c/BuiltinAttributes.h | 4 + mlir/include/mlir-c/Dialect/LLVM.h | 231 ++++++++++++++++++++++++ mlir/lib/CAPI/Dialect/LLVM.cpp | 207 +++++++++++++++++++++ mlir/lib/CAPI/IR/BuiltinAttributes.cpp | 4 + mlir/test/CAPI/ir.c | 4 + mlir/test/CAPI/llvm.c | 113 +++++++++++- 6 files changed, 562 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir-c/BuiltinAttributes.h b/mlir/include/mlir-c/BuiltinAttributes.h index 01d1b6008f5e21..231eb83b5e2694 100644 --- a/mlir/include/mlir-c/BuiltinAttributes.h +++ b/mlir/include/mlir-c/BuiltinAttributes.h @@ -266,6 +266,10 @@ mlirSymbolRefAttrGetNestedReference(MlirAttribute attr, intptr_t pos); /// Returns the typeID of an SymbolRef attribute. MLIR_CAPI_EXPORTED MlirTypeID mlirSymbolRefAttrGetTypeID(void); +/// Creates a DisctinctAttr with the referenced attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirDisctinctAttrCreate(MlirAttribute referencedAttr); + //===----------------------------------------------------------------------===// // Flat SymbolRef attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index ac216b01f364d4..d823afb659c8db 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -11,6 +11,7 @@ #define MLIR_C_DIALECT_LLVM_H #include "mlir-c/IR.h" +#include "mlir-c/Support.h" #ifdef __cplusplus extern "C" { @@ -98,6 +99,236 @@ MLIR_CAPI_EXPORTED MlirLogicalResult mlirLLVMStructTypeSetBody(MlirType structType, intptr_t nFieldTypes, MlirType const *fieldTypes, bool isPacked); +enum MlirLLVMCConv { + MlirLLVMCConvC = 0, + MlirLLVMCConvFast = 8, + MlirLLVMCConvCold = 9, + MlirLLVMCConvGHC = 10, + MlirLLVMCConvHiPE = 11, + MlirLLVMCConvAnyReg = 13, + MlirLLVMCConvPreserveMost = 14, + MlirLLVMCConvPreserveAll = 15, + MlirLLVMCConvSwift = 16, + MlirLLVMCConvCXX_FAST_TLS = 17, + MlirLLVMCConvTail = 18, + MlirLLVMCConvCFGuard_Check = 19, + MlirLLVMCConvSwiftTail = 20, + MlirLLVMCConvX86_StdCall = 64, + MlirLLVMCConvX86_FastCall = 65, + MlirLLVMCConvARM_APCS = 66, + MlirLLVMCConvARM_AAPCS = 67, + MlirLLVMCConvARM_AAPCS_VFP = 68, + MlirLLVMCConvMSP430_INTR = 69, + MlirLLVMCConvX86_ThisCall = 70, + MlirLLVMCConvPTX_Kernel = 71, + MlirLLVMCConvPTX_Device = 72, + MlirLLVMCConvSPIR_FUNC = 75, + MlirLLVMCConvSPIR_KERNEL = 76, + MlirLLVMCConvIntel_OCL_BI = 77, + MlirLLVMCConvX86_64_SysV = 78, + MlirLLVMCConvWin64 = 79, + MlirLLVMCConvX86_VectorCall = 80, + MlirLLVMCConvDUMMY_HHVM = 81, + MlirLLVMCConvDUMMY_HHVM_C = 82, + MlirLLVMCConvX86_INTR = 83, + MlirLLVMCConvAVR_INTR = 84, + MlirLLVMCConvAVR_BUILTIN = 86, + MlirLLVMCConvAMDGPU_VS = 87, + MlirLLVMCConvAMDGPU_GS = 88, + MlirLLVMCConvAMDGPU_CS = 90, + MlirLLVMCConvAMDGPU_KERNEL = 91, + MlirLLVMCConvX86_RegCall = 92, + MlirLLVMCConvAMDGPU_HS = 93, + MlirLLVMCConvMSP430_BUILTIN = 94, + MlirLLVMCConvAMDGPU_LS = 95, + MlirLLVMCConvAMDGPU_ES = 96, + MlirLLVMCConvAArch64_VectorCall = 97, + MlirLLVMCConvAArch64_SVE_VectorCall = 98, + MlirLLVMCConvWASM_EmscriptenInvoke = 99, + MlirLLVMCConvAMDGPU_Gfx = 100, + MlirLLVMCConvM68k_INTR = 101, +}; +typedef enum MlirLLVMCConv MlirLLVMCConv; + +/// Creates a LLVM CConv attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMCConvAttrGet(MlirContext ctx, + MlirLLVMCConv cconv); + +enum MlirLLVMComdat { + MlirLLVMComdatAny = 0, + MlirLLVMComdatExactMatch = 1, + MlirLLVMComdatLargest = 2, + MlirLLVMComdatNoDeduplicate = 3, + MlirLLVMComdatSameSize = 4, +}; +typedef enum MlirLLVMComdat MlirLLVMComdat; + +/// Creates a LLVM Comdat attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMComdatAttrGet(MlirContext ctx, + MlirLLVMComdat comdat); + +enum MlirLLVMLinkage { + MlirLLVMLinkagePrivate = 0, + MlirLLVMLinkageInternal = 1, + MlirLLVMLinkageAvailableExternally = 2, + MlirLLVMLinkageLinkonce = 3, + MlirLLVMLinkageWeak = 4, + MlirLLVMLinkageCommon = 5, + MlirLLVMLinkageAppending = 6, + MlirLLVMLinkageExternWeak = 7, + MlirLLVMLinkageLinkonceODR = 8, + MlirLLVMLinkageWeakODR = 9, + MlirLLVMLinkageExternal = 10, +}; +typedef enum MlirLLVMLinkage MlirLLVMLinkage; + +/// Creates a LLVM Linkage attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMLinkageAttrGet(MlirContext ctx, MlirLLVMLinkage linkage); + +/// Creates a LLVM DINullType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDINullTypeAttrGet(MlirContext ctx); + +/// Creates a LLVM DIExpressionElem attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIExpressionElemAttrGet(MlirContext ctx, unsigned int opcode, + intptr_t nArguments, uint64_t const *arguments); + +/// Creates a LLVM DIExpression attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIExpressionAttrGet( + MlirContext ctx, intptr_t nOperations, MlirAttribute const *operations); + +enum MlirLLVMTypeEncoding { + MlirLLVMTypeEncodingAddress = 0x1, + MlirLLVMTypeEncodingBoolean = 0x2, + MlirLLVMTypeEncodingComplexFloat = 0x31, + MlirLLVMTypeEncodingFloatT = 0x4, + MlirLLVMTypeEncodingSigned = 0x5, + MlirLLVMTypeEncodingSignedChar = 0x6, + MlirLLVMTypeEncodingUnsigned = 0x7, + MlirLLVMTypeEncodingUnsignedChar = 0x08, + MlirLLVMTypeEncodingImaginaryFloat = 0x09, + MlirLLVMTypeEncodingPackedDecimal = 0x0a, + MlirLLVMTypeEncodingNumericString = 0x0b, + MlirLLVMTypeEncodingEdited = 0x0c, + MlirLLVMTypeEncodingSignedFixed = 0x0d, + MlirLLVMTypeEncodingUnsignedFixed = 0x0e, + MlirLLVMTypeEncodingDecimalFloat = 0x0f, + MlirLLVMTypeEncodingUTF = 0x10, + MlirLLVMTypeEncodingUCS = 0x11, + MlirLLVMTypeEncodingASCII = 0x12, + MlirLLVMTypeEncodingLoUser = 0x80, + MlirLLVMTypeEncodingHiUser = 0xff, +}; +typedef enum MlirLLVMTypeEncoding MlirLLVMTypeEncoding; + +/// Creates a LLVM DIBasicType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIBasicTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, uint64_t sizeInBits, + MlirLLVMTypeEncoding encoding); + +/// Creates a LLVM DICompositeType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompositeTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file, + uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, + uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, + MlirAttribute const *elements); + +/// Creates a LLVM DIDerivedType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIDerivedTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, + MlirAttribute baseType, uint64_t sizeInBits, uint32_t alignInBits, + uint64_t offsetInBits); + +/// Gets the base type from a LLVM DIDerivedType attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIDerivedTypeAttrGetBaseType(MlirAttribute diDerivedType); + +/// Creates a LLVM DIFileAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIFileAttrGet(MlirContext ctx, + MlirAttribute name, + MlirAttribute directory); + +enum MlirLLVMDIEmissionKind { + MlirLLVMDIEmissionKindNone = 0, + MlirLLVMDIEmissionKindFull = 1, + MlirLLVMDIEmissionKindLineTablesOnly = 2, + MlirLLVMDIEmissionKindDebugDirectivesOnly = 3, +}; +typedef enum MlirLLVMDIEmissionKind MlirLLVMDIEmissionKind; + +/// Creates a LLVM DICompileUnit attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompileUnitAttrGet( + MlirContext ctx, MlirAttribute id, unsigned int sourceLanguage, + MlirAttribute file, MlirAttribute producer, bool isOptimized, + MlirLLVMDIEmissionKind emissionKind); + +/// Creates a LLVM DIFlags attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, + uint64_t value); + +/// Creates a LLVM DILexicalBlock attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILexicalBlockAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute file, unsigned int line, + unsigned int column); + +/// Creates a LLVM DILexicalBlockFile attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILexicalBlockFileAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute file, + unsigned int discriminator); + +/// Creates a LLVM DILocalVariableAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILocalVariableAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute name, + MlirAttribute diFile, unsigned int line, unsigned int arg, + unsigned int alignInBits, MlirAttribute diType); + +/// Creates a LLVM DISubprogramAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDISubprogramAttrGet( + MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, + MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, + MlirAttribute file, unsigned int line, unsigned int scopeLine, + uint64_t subprogramFlags, MlirAttribute type); + +/// Gets the scope from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetScope(MlirAttribute diSubprogram); + +/// Gets the line from this DISubprogramAttr. +MLIR_CAPI_EXPORTED unsigned int +mlirLLVMDISubprogramAttrGetLine(MlirAttribute diSubprogram); + +/// Gets the scope line from this DISubprogram. +MLIR_CAPI_EXPORTED unsigned int +mlirLLVMDISubprogramAttrGetScopeLine(MlirAttribute diSubprogram); + +/// Gets the compile unit from this DISubprogram. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetCompileUnit(MlirAttribute diSubprogram); + +/// Gets the file from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetFile(MlirAttribute diSubprogram); + +/// Gets the type from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetType(MlirAttribute diSubprogram); + +/// Creates a LLVM DISubroutineTypeAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubroutineTypeAttrGet(MlirContext ctx, unsigned int callingConvention, + intptr_t nTypes, MlirAttribute const *types); + +/// Creates a LLVM DIModuleAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIModuleAttrGet( + MlirContext ctx, MlirAttribute file, MlirAttribute scope, + MlirAttribute name, MlirAttribute configMacros, MlirAttribute includePath, + MlirAttribute apinotes, unsigned int line, bool isDecl); + +/// Gets the scope of this DIModuleAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule); + #ifdef __cplusplus } #endif diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 642018a814ca12..2d938ce5f4834c 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -7,9 +7,16 @@ //===----------------------------------------------------------------------===// #include "mlir-c/Dialect/LLVM.h" +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" #include "mlir/CAPI/Registration.h" +#include "mlir/CAPI/Wrap.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "llvm-c/Core.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallVectorExtras.h" using namespace mlir; using namespace mlir::LLVM; @@ -110,3 +117,203 @@ MlirLogicalResult mlirLLVMStructTypeSetBody(MlirType structType, cast(unwrap(structType)) .setBody(unwrapList(nFieldTypes, fieldTypes, fields), isPacked)); } + +MlirAttribute mlirLLVMDIExpressionElemAttrGet(MlirContext ctx, + unsigned int opcode, + intptr_t nArguments, + uint64_t const *arguments) { + auto list = ArrayRef(arguments, nArguments); + return wrap(DIExpressionElemAttr::get(unwrap(ctx), opcode, list)); +} + +MlirAttribute mlirLLVMDIExpressionAttrGet(MlirContext ctx, intptr_t nOperations, + MlirAttribute const *operations) { + SmallVector attrStorage; + attrStorage.reserve(nOperations); + + return wrap(DIExpressionAttr::get( + unwrap(ctx), + llvm::map_to_vector( + unwrapList(nOperations, operations, attrStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDINullTypeAttrGet(MlirContext ctx) { + return wrap(DINullTypeAttr::get(unwrap(ctx))); +} + +MlirAttribute mlirLLVMDIBasicTypeAttrGet(MlirContext ctx, unsigned int tag, + MlirAttribute name, + uint64_t sizeInBits, + MlirLLVMTypeEncoding encoding) { + + return wrap(DIBasicTypeAttr::get( + unwrap(ctx), tag, cast(unwrap(name)), sizeInBits, encoding)); +} + +MlirAttribute mlirLLVMDICompositeTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file, + uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, + uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, + MlirAttribute const *elements) { + SmallVector elementsStorage; + elementsStorage.reserve(nElements); + + return wrap(DICompositeTypeAttr::get( + unwrap(ctx), tag, cast(unwrap(name)), + cast(unwrap(file)), line, cast(unwrap(scope)), + cast(unwrap(baseType)), DIFlags(flags), sizeInBits, + alignInBits, + llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDIDerivedTypeAttrGet(MlirContext ctx, unsigned int tag, + MlirAttribute name, + MlirAttribute baseType, + uint64_t sizeInBits, + uint32_t alignInBits, + uint64_t offsetInBits) { + return wrap(DIDerivedTypeAttr::get(unwrap(ctx), tag, + cast(unwrap(name)), + cast(unwrap(baseType)), + sizeInBits, alignInBits, offsetInBits)); +} + +MlirAttribute +mlirLLVMDIDerivedTypeAttrGetBaseType(MlirAttribute diDerivedType) { + return wrap(cast(unwrap(diDerivedType)).getBaseType()); +} + +MlirAttribute mlirLLVMCConvAttrGet(MlirContext ctx, MlirLLVMCConv cconv) { + return wrap(CConvAttr::get(unwrap(ctx), CConv(cconv))); +} + +MlirAttribute mlirLLVMComdatAttrGet(MlirContext ctx, MlirLLVMComdat comdat) { + return wrap(ComdatAttr::get(unwrap(ctx), comdat::Comdat(comdat))); +} + +MlirAttribute mlirLLVMLinkageAttrGet(MlirContext ctx, MlirLLVMLinkage linkage) { + return wrap(LinkageAttr::get(unwrap(ctx), linkage::Linkage(linkage))); +} + +MlirAttribute mlirLLVMDIFileAttrGet(MlirContext ctx, MlirAttribute name, + MlirAttribute directory) { + return wrap(DIFileAttr::get(unwrap(ctx), cast(unwrap(name)), + cast(unwrap(directory)))); +} + +MlirAttribute +mlirLLVMDICompileUnitAttrGet(MlirContext ctx, MlirAttribute id, + unsigned int sourceLanguage, MlirAttribute file, + MlirAttribute producer, bool isOptimized, + MlirLLVMDIEmissionKind emissionKind) { + return wrap(DICompileUnitAttr::get( + unwrap(ctx), cast(unwrap(id)), sourceLanguage, + cast(unwrap(file)), cast(unwrap(producer)), + isOptimized, DIEmissionKind(emissionKind))); +} + +MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, uint64_t value) { + return wrap(DIFlagsAttr::get(unwrap(ctx), DIFlags(value))); +} + +MlirAttribute mlirLLVMDILexicalBlockAttrGet(MlirContext ctx, + MlirAttribute scope, + MlirAttribute file, + unsigned int line, + unsigned int column) { + return wrap( + DILexicalBlockAttr::get(unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(file)), line, column)); +} + +MlirAttribute mlirLLVMDILexicalBlockFileAttrGet(MlirContext ctx, + MlirAttribute scope, + MlirAttribute file, + unsigned int discriminator) { + return wrap(DILexicalBlockFileAttr::get( + unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(file)), discriminator)); +} + +MlirAttribute +mlirLLVMDILocalVariableAttrGet(MlirContext ctx, MlirAttribute scope, + MlirAttribute name, MlirAttribute diFile, + unsigned int line, unsigned int arg, + unsigned int alignInBits, MlirAttribute diType) { + return wrap(DILocalVariableAttr::get( + unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(name)), cast(unwrap(diFile)), line, + arg, alignInBits, cast(unwrap(diType)))); +} + +MlirAttribute mlirLLVMDISubroutineTypeAttrGet(MlirContext ctx, + unsigned int callingConvention, + intptr_t nTypes, + MlirAttribute const *types) { + SmallVector attrStorage; + attrStorage.reserve(nTypes); + + return wrap(DISubroutineTypeAttr::get( + unwrap(ctx), callingConvention, + llvm::map_to_vector(unwrapList(nTypes, types, attrStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDISubprogramAttrGet( + MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, + MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, + MlirAttribute file, unsigned int line, unsigned int scopeLine, + uint64_t subprogramFlags, MlirAttribute type) { + return wrap(DISubprogramAttr::get( + unwrap(ctx), cast(unwrap(id)), + cast(unwrap(compileUnit)), + cast(unwrap(scope)), cast(unwrap(name)), + cast(unwrap(linkageName)), cast(unwrap(file)), + line, scopeLine, DISubprogramFlags(subprogramFlags), + cast(unwrap(type)))); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetScope(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getScope()); +} + +unsigned int mlirLLVMDISubprogramAttrGetLine(MlirAttribute diSubprogram) { + return cast(unwrap(diSubprogram)).getLine(); +} + +unsigned int mlirLLVMDISubprogramAttrGetScopeLine(MlirAttribute diSubprogram) { + return cast(unwrap(diSubprogram)).getScopeLine(); +} + +MlirAttribute +mlirLLVMDISubprogramAttrGetCompileUnit(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getCompileUnit()); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetFile(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getFile()); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetType(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getType()); +} + +MlirAttribute mlirLLVMDIModuleAttrGet(MlirContext ctx, MlirAttribute file, + MlirAttribute scope, MlirAttribute name, + MlirAttribute configMacros, + MlirAttribute includePath, + MlirAttribute apinotes, unsigned int line, + bool isDecl) { + return wrap(DIModuleAttr::get( + unwrap(ctx), cast(unwrap(file)), + cast(unwrap(scope)), cast(unwrap(name)), + cast(unwrap(configMacros)), + cast(unwrap(includePath)), cast(unwrap(apinotes)), + line, isDecl)); +} + +MlirAttribute mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule) { + return wrap(cast(unwrap(diModule)).getScope()); +} diff --git a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp index b3066ee0c28bdc..726af884668b2d 100644 --- a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp @@ -289,6 +289,10 @@ MlirTypeID mlirSymbolRefAttrGetTypeID(void) { return wrap(SymbolRefAttr::getTypeID()); } +MlirAttribute mlirDisctinctAttrCreate(MlirAttribute referencedAttr) { + return wrap(mlir::DistinctAttr::create(unwrap(referencedAttr))); +} + //===----------------------------------------------------------------------===// // Flat SymbolRef attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index a9850c0a132e75..8e79338c57a22a 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -1482,6 +1482,10 @@ int printAffineMap(MlirContext ctx) { // CHECK: (d0, d1, d2) -> (d0) // CHECK: (d0, d1, d2) -> (d2) + // CHECK: distinct[0]<"foo"> + mlirAttributeDump(mlirDisctinctAttrCreate( + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")))); + return 0; } diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 1817988dd67dd6..2fd98b29f487c8 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -10,9 +10,12 @@ // RUN: mlir-capi-llvm-test 2>&1 | FileCheck %s #include "mlir-c/Dialect/LLVM.h" +#include "mlir-c/BuiltinAttributes.h" #include "mlir-c/BuiltinTypes.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" +#include "llvm-c/Core.h" +#include "llvm-c/DebugInfo.h" #include #include @@ -77,7 +80,7 @@ static void testTypeCreation(MlirContext ctx) { // CHECK-LABEL: testStructTypeCreation static int testStructTypeCreation(MlirContext ctx) { - fprintf(stderr, "testStructTypeCreation"); + fprintf(stderr, "testStructTypeCreation\n"); // CHECK: !llvm.struct<()> mlirTypeDump(mlirLLVMStructTypeLiteralGet(ctx, /*nFieldTypes=*/0, @@ -225,12 +228,120 @@ static int testStructTypeCreation(MlirContext ctx) { return 0; } +// CHECK-LABEL: testLLVMAttributes +static void testLLVMAttributes(MlirContext ctx) { + fprintf(stderr, "testLLVMAttributes\n"); + + // CHECK: #llvm.linkage + mlirAttributeDump(mlirLLVMLinkageAttrGet(ctx, MlirLLVMLinkageInternal)); + // CHECK: #llvm.cconv + mlirAttributeDump(mlirLLVMCConvAttrGet(ctx, MlirLLVMCConvC)); + // CHECK: #llvm + mlirAttributeDump(mlirLLVMComdatAttrGet(ctx, MlirLLVMComdatAny)); +} + +// CHECK-LABEL: testDebugInfoAttributes +static void testDebugInfoAttributes(MlirContext ctx) { + fprintf(stderr, "testDebugInfoAttributes\n"); + + MlirAttribute foo = + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")); + MlirAttribute bar = + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("bar")); + MlirAttribute id = mlirDisctinctAttrCreate(foo); + + // CHECK: #llvm.di_null_type + mlirAttributeDump(mlirLLVMDINullTypeAttrGet(ctx)); + + // CHECK: #llvm.di_basic_type + MlirAttribute di_type = + mlirLLVMDIBasicTypeAttrGet(ctx, 0, foo, 64, MlirLLVMTypeEncodingSigned); + mlirAttributeDump(di_type); + + MlirAttribute file = mlirLLVMDIFileAttrGet(ctx, foo, bar); + + // CHECK: #llvm.di_file<"foo" in "bar"> + mlirAttributeDump(file); + + MlirAttribute compile_unit = + mlirLLVMDICompileUnitAttrGet(ctx, id, LLVMDWARFSourceLanguageC99, file, + foo, false, MlirLLVMDIEmissionKindFull); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(compile_unit); + + MlirAttribute di_module = mlirLLVMDIModuleAttrGet( + ctx, file, compile_unit, foo, + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("")), bar, foo, 1, + 0); + // CHECK: #llvm.di_module<{{.*}}> + mlirAttributeDump(di_module); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(mlirLLVMDIModuleAttrGetScope(di_module)); + + // CHECK: 1 : i32 + mlirAttributeDump(mlirLLVMDIFlagsAttrGet(ctx, 0x1)); + + // CHECK: #llvm.di_lexical_block<{{.*}}> + mlirAttributeDump( + mlirLLVMDILexicalBlockAttrGet(ctx, compile_unit, file, 1, 2)); + + // CHECK: #llvm.di_lexical_block_file<{{.*}}> + mlirAttributeDump( + mlirLLVMDILexicalBlockFileAttrGet(ctx, compile_unit, file, 3)); + + // CHECK: #llvm.di_local_variable<{{.*}}> + mlirAttributeDump(mlirLLVMDILocalVariableAttrGet(ctx, compile_unit, foo, file, + 1, 0, 8, di_type)); + // CHECK: #llvm.di_derived_type<{{.*}}> + mlirAttributeDump( + mlirLLVMDIDerivedTypeAttrGet(ctx, 0, bar, di_type, 64, 8, 0)); + + // CHECK: #llvm.di_composite_type<{{.*}}> + mlirAttributeDump(mlirLLVMDICompositeTypeAttrGet( + ctx, 0, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, &di_type)); + + MlirAttribute subroutine_type = + mlirLLVMDISubroutineTypeAttrGet(ctx, 0x0, 1, &di_type); + + // CHECK: #llvm.di_subroutine_type<{{.*}}> + mlirAttributeDump(subroutine_type); + + MlirAttribute di_subprogram = + mlirLLVMDISubprogramAttrGet(ctx, id, compile_unit, compile_unit, foo, bar, + file, 1, 2, 0, subroutine_type); + // CHECK: #llvm.di_subprogram<{{.*}}> + mlirAttributeDump(di_subprogram); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetScope(di_subprogram)); + + // CHECK: #llvm.di_file<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetFile(di_subprogram)); + + // CHECK: #llvm.di_subroutine_type<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetType(di_subprogram)); + + MlirAttribute expression_elem = + mlirLLVMDIExpressionElemAttrGet(ctx, 1, 1, &(uint64_t){1}); + + // CHECK: #llvm + mlirAttributeDump(expression_elem); + + // CHECK: #llvm.di_expression<[(1)]> + mlirAttributeDump(mlirLLVMDIExpressionAttrGet(ctx, 1, &expression_elem)); +} + int main(void) { MlirContext ctx = mlirContextCreate(); mlirDialectHandleRegisterDialect(mlirGetDialectHandle__llvm__(), ctx); mlirContextGetOrLoadDialect(ctx, mlirStringRefCreateFromCString("llvm")); testTypeCreation(ctx); int result = testStructTypeCreation(ctx); + testLLVMAttributes(ctx); + testDebugInfoAttributes(ctx); mlirContextDestroy(ctx); if (result) fprintf(stderr, "FAILED: code %d", result); From 6157538d9e4eea135fb863b972c577f648c21641 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 7 Mar 2024 17:36:28 +0000 Subject: [PATCH 059/158] [InstCombine] ptrmask of gep for dynamic pointer aligment (#80002) Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction. --- .../InstCombineSimplifyDemanded.cpp | 38 ++++ llvm/test/Transforms/InstCombine/ptrmask.ll | 164 +++++++++++++++++- 2 files changed, 198 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 1b963a7de4a8ae..c691c8b1c55b30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -997,6 +997,44 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth))) return I; + // Combine: + // (ptrmask (getelementptr i8, ptr p, imm i), imm mask) + // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask) + // where only the low bits known to be zero in the pointer are changed + Value *InnerPtr; + uint64_t GEPIndex; + uint64_t PtrMaskImmediate; + if (match(I, m_Intrinsic( + m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)), + m_ConstantInt(PtrMaskImmediate)))) { + + LHSKnown = computeKnownBits(InnerPtr, Depth + 1, I); + if (!LHSKnown.isZero()) { + const unsigned trailingZeros = LHSKnown.countMinTrailingZeros(); + uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1; + + uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits; + uint64_t MaskedLowBitsGEPIndex = + GEPIndex & PointerAlignBits & PtrMaskImmediate; + + uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; + + if (MaskedGEPIndex != GEPIndex) { + auto *GEP = cast(II->getArgOperand(0)); + Builder.SetInsertPoint(I); + Type *GEPIndexType = + DL.getIndexType(GEP->getPointerOperand()->getType()); + Value *MaskedGEP = Builder.CreateGEP( + GEP->getSourceElementType(), InnerPtr, + ConstantInt::get(GEPIndexType, MaskedGEPIndex), + GEP->getName(), GEP->isInBounds()); + + replaceOperand(*I, 0, MaskedGEP); + return I; + } + } + } + break; } diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index afeb5d5251d0f4..4631b81cd1ce1f 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -80,12 +80,12 @@ define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr ad define ptr @ptrmask_combine_add_nonnull(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33 -; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32) +; CHECK-NEXT: [[PM0:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -60) +; CHECK-NEXT: [[PGEP1:%.*]] = getelementptr i8, ptr [[PM0]], i64 32 +; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP1]], i64 -32) ; CHECK-NEXT: ret ptr [[R]] ; - %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -60) %pgep = getelementptr i8, ptr %pm0, i64 33 %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16) ret ptr %r @@ -287,6 +287,162 @@ define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) { ret ptr addrspace(1) %r } +define ptr @ptrmask_is_nop0(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_is_nop0 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_is_nop1(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_is_nop1 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep0(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep0 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i8, ptr %p, i32 5 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep1(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep1 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %gep = getelementptr i8, ptr %p, i32 6 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep2(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep2 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 12 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 15 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep4(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep4 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24 +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i8, ptr %p, i32 29 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep5(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep5 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 30 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep6(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep6 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 28 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep_indirect0(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect0 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; 44 from 4*sizeof(i32) + (31 & -4) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 44 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep0 = getelementptr i32, ptr %p, i32 4 + %gep1 = getelementptr i8, ptr %gep0, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep1, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep_indirect1(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect1 +; CHECK-SAME: (ptr [[P:%.*]]) { + +; CHECK-NEXT: [[R:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[R]], i64 32 +; CHECK-NEXT: ret ptr [[GEP]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -16) + %pgep = getelementptr i8, ptr %pm0, i64 33 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16) + ret ptr %r +} + +define ptr @ptrmask_to_modified_gep_zero_argument() { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_zero_argument() { +; CHECK-NEXT: [[P:%.*]] = call nonnull align 4 ptr @llvm.ptrmask.p0.i64(ptr nonnull inttoptr (i64 31 to ptr), i64 28) +; CHECK-NEXT: ret ptr [[P]] +; + %gep = getelementptr inbounds i8, ptr null, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_preserves_inbounds(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_preserves_inbounds +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr inbounds i8, ptr %p, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_of_gep_requires_i8(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_of_gep_requires_i8 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i16, ptr %p, i32 5 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented +; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17 +; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[PM]] +; + %gep = getelementptr i8, <2 x ptr> %p, i32 17 + %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %gep, <2 x i64> ) + ret <2 x ptr> %pm +} + define ptr @ptrmask_is_useless0(i64 %i, i64 %m) { ; CHECK-LABEL: define ptr @ptrmask_is_useless0 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { From 36f866c6ec3f6671fd4178ed4e49fd632a335cc2 Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Thu, 7 Mar 2024 09:37:27 -0800 Subject: [PATCH 060/158] Fix vfork test strcmp buildbot failure (#84224) The buildbot seems to complain about `strcmp` function not available in the vfork patch (https://github.com/llvm/llvm-project/pull/81564): https://lab.llvm.org/buildbot/#/builders/68/builds/70093/steps/6/logs/stdio Unfortunately, I can't reproduce the failure on my linux machine so this is a guessing fix. If anyone has a way to reproduce and very this fix, please feel free to merge this change. Co-authored-by: jeffreytan81 --- lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp b/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp index 2f3a95dc5c6eef..d72051e4ee84d9 100644 --- a/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp +++ b/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include From ee24409c40ff35c3221892d9723331c233ca9f0e Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 7 Mar 2024 09:25:11 -0800 Subject: [PATCH 061/158] Revert "[AArch64][GlobalISel] Fix incorrect selection of monotonic s32->s64 anyext load." This reverts commit 7524ad9aa7b1b5003fe554a6ac8e434d50027dfb. Broke sanitizer build bots, e.g. https://lab.llvm.org/buildbot/#/builders/5/builds/41588/steps/9/logs/stdio --- .../GISel/AArch64InstructionSelector.cpp | 9 +++-- .../GlobalISel/select-atomic-load-store.mir | 33 +++---------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 48b73dced09ba0..6652883792391b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2997,14 +2997,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } } - if (IsZExtLoad || - (isa(LdSt) && ValTy == LLT::scalar(64) && MemSizeInBits == 32)) { - // The any/zextload from a smaller type to i32 should be handled by the + if (IsZExtLoad) { + // The zextload from a smaller type to i32 should be handled by the // importer. if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) return false; - // If we have an extending load then change the load's type to be a - // narrower reg and zero_extend with SUBREG_TO_REG. + // If we have a ZEXTLOAD then change the load's type to be a narrower reg + // and zero_extend with SUBREG_TO_REG. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); Register DstReg = LoadStore->getOperand(0).getReg(); LoadStore->getOperand(0).setReg(LdReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir index 6b4bbb85b2ec44..5787f914b965d3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir @@ -9,11 +9,6 @@ ret i8 %v } - define i32 @anyext_load_monotonic_i32() { - %v = load atomic i32, ptr null monotonic, align 4 - ret i32 %v - } - ... --- name: load_acq_i8 @@ -30,33 +25,13 @@ body: | ; CHECK-LABEL: name: load_acq_i8 ; CHECK: liveins: $x0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8) from %ir.ptr, align 8) - ; CHECK-NEXT: $w0 = COPY [[LDARB]] - ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8) from %ir.ptr, align 8) + ; CHECK: $w0 = COPY [[LDARB]] + ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 %2:gpr(s32) = G_LOAD %0(p0) :: (load acquire (s8) from %ir.ptr, align 8) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 ... ---- -name: anyext_load_monotonic_i32 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.1: - ; CHECK-LABEL: name: anyext_load_monotonic_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $xzr - ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load monotonic (s32) from `ptr null`) - ; CHECK-NEXT: %ld:gpr64all = SUBREG_TO_REG 0, [[LDRWui]], %subreg.sub_32 - ; CHECK-NEXT: $x0 = COPY %ld - ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %1:gpr(p0) = G_CONSTANT i64 0 - %ld:gpr(s64) = G_LOAD %1(p0) :: (load monotonic (s32) from `ptr null`) - $x0 = COPY %ld(s64) - RET_ReallyLR implicit $x0 - -... From e9901d8c94fdcd0d299d1abfdc8f0a5936aa7a50 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 11:44:31 -0600 Subject: [PATCH 062/158] [LinkerWrapper] Accept compression arguments for HIP fatbins (#84337) Summary: The HIP toolchain has support for compressing the final output. We should respect that when we create the executable. --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++++ clang/test/Driver/linker-wrapper.c | 4 ++-- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 3 +++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e63e8a8e2e0e4b..fa17f6295d6ea7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8895,6 +8895,10 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, for (const char *LinkArg : LinkCommand->getArguments()) CmdArgs.push_back(LinkArg); + if (Args.hasFlag(options::OPT_offload_compress, + options::OPT_no_offload_compress, false)) + CmdArgs.push_back("--compress"); + const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper")); diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index c37f01189d0870..0e6fd80b429846 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -114,12 +114,12 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --compress \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908 // HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a -// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb +// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 0a783db8962ba7..7e6e289c50d872 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -405,6 +405,9 @@ fatbinary(ArrayRef> InputFiles, CmdArgs.push_back("-type=o"); CmdArgs.push_back("-bundle-align=4096"); + if (Args.hasArg(OPT_compress)) + CmdArgs.push_back("-compress"); + SmallVector Targets = {"-targets=host-x86_64-unknown-linux"}; for (const auto &[File, Arch] : InputFiles) Targets.push_back(Saver.save("hipv4-amdgcn-amd-amdhsa--" + Arch)); diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 2c6a788cf23a38..473fb19d922385 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -58,6 +58,8 @@ def print_wrapped_module : Flag<["--"], "print-wrapped-module">, HelpText<"Print the wrapped module's IR for testing">; def save_temps : Flag<["--"], "save-temps">, Flags<[WrapperOnlyOption]>, HelpText<"Save intermediate results">; +def compress : Flag<["--"], "compress">, + Flags<[WrapperOnlyOption]>, HelpText<"Compress bundled files">; def wrapper_time_trace_eq : Joined<["--"], "wrapper-time-trace=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, From ea49e04b35bc8e4bed7ee4db4074d201f780a15c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 7 Mar 2024 09:55:00 -0800 Subject: [PATCH 063/158] [lldb] Don't report all progress event as completed. (#84281) Currently, progress events reported by the ProgressManager and broadcast to eBroadcastBitProgressCategory always specify they're complete. The problem is that the ProgressManager reports kNonDeterministicTotal for both the total and the completed number of (sub)events. Because the values are the same, the event reports itself as complete. This patch fixes the issue by reporting 0 as the completed value for the start event and kNonDeterministicTotal for the end event. --- lldb/include/lldb/Core/Progress.h | 9 +++++++-- lldb/source/Core/Progress.cpp | 16 +++++++++------- lldb/unittests/Core/ProgressReportTest.cpp | 4 ++-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lldb/include/lldb/Core/Progress.h b/lldb/include/lldb/Core/Progress.h index c6fc861fb71d86..c38f6dd0a140ed 100644 --- a/lldb/include/lldb/Core/Progress.h +++ b/lldb/include/lldb/Core/Progress.h @@ -148,9 +148,14 @@ class ProgressManager { static ProgressManager &Instance(); - static void ReportProgress(const Progress::ProgressData &); - private: + enum class EventType { + Begin, + End, + }; + static void ReportProgress(const Progress::ProgressData &progress_data, + EventType type); + llvm::StringMap> m_progress_category_map; std::mutex m_progress_map_mutex; diff --git a/lldb/source/Core/Progress.cpp b/lldb/source/Core/Progress.cpp index 9dcd7cf75ae057..b4b5e98b7ba493 100644 --- a/lldb/source/Core/Progress.cpp +++ b/lldb/source/Core/Progress.cpp @@ -97,7 +97,7 @@ void ProgressManager::Increment(const Progress::ProgressData &progress_data) { // initial progress report. if (!m_progress_category_map.contains(progress_data.title)) { m_progress_category_map[progress_data.title].second = progress_data; - ReportProgress(progress_data); + ReportProgress(progress_data, EventType::Begin); } m_progress_category_map[progress_data.title].first++; } @@ -110,7 +110,7 @@ void ProgressManager::Decrement(const Progress::ProgressData &progress_data) { return; if (pos->second.first <= 1) { - ReportProgress(pos->second.second); + ReportProgress(pos->second.second, EventType::End); m_progress_category_map.erase(progress_data.title); } else { --pos->second.first; @@ -118,12 +118,14 @@ void ProgressManager::Decrement(const Progress::ProgressData &progress_data) { } void ProgressManager::ReportProgress( - const Progress::ProgressData &progress_data) { + const Progress::ProgressData &progress_data, EventType type) { // The category bit only keeps track of when progress report categories have // started and ended, so clear the details and reset other fields when // broadcasting to it since that bit doesn't need that information. - Debugger::ReportProgress( - progress_data.progress_id, progress_data.title, "", - Progress::kNonDeterministicTotal, Progress::kNonDeterministicTotal, - progress_data.debugger_id, Debugger::eBroadcastBitProgressCategory); + const uint64_t completed = + (type == EventType::Begin) ? 0 : Progress::kNonDeterministicTotal; + Debugger::ReportProgress(progress_data.progress_id, progress_data.title, "", + completed, Progress::kNonDeterministicTotal, + progress_data.debugger_id, + Debugger::eBroadcastBitProgressCategory); } diff --git a/lldb/unittests/Core/ProgressReportTest.cpp b/lldb/unittests/Core/ProgressReportTest.cpp index 98cbc475ce2835..e0253cbc4ec59b 100644 --- a/lldb/unittests/Core/ProgressReportTest.cpp +++ b/lldb/unittests/Core/ProgressReportTest.cpp @@ -168,7 +168,7 @@ TEST_F(ProgressReportTest, TestProgressManager) { ASSERT_EQ(data->GetDetails(), ""); ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); + ASSERT_FALSE(data->GetCompleted()); ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); ASSERT_EQ(data->GetMessage(), "Progress report 1"); @@ -199,7 +199,7 @@ TEST_F(ProgressReportTest, TestProgressManager) { ASSERT_EQ(data->GetDetails(), ""); ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); + ASSERT_FALSE(data->GetCompleted()); ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); ASSERT_EQ(data->GetMessage(), "Overlapping report 1"); From cfdfeb440cb2e25d1537616118a6c5509d96f2ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 7 Mar 2024 18:54:25 +0100 Subject: [PATCH 064/158] [clang][Interp][NFC] Remove unneeded forward declaration We already import Record.h. --- clang/lib/AST/Interp/Program.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/Interp/Program.h b/clang/lib/AST/Interp/Program.h index 045bf7ab7745b1..50bdb575e805cf 100644 --- a/clang/lib/AST/Interp/Program.h +++ b/clang/lib/AST/Interp/Program.h @@ -34,7 +34,6 @@ class VarDecl; namespace interp { class Context; -class Record; /// The program contains and links the bytecode for all functions. class Program final { From a435e1f63bbd8c6d0ff140ccc890c25787091490 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Thu, 7 Mar 2024 10:06:47 -0800 Subject: [PATCH 065/158] [acc] Add attribute for combined constructs (#80319) Combined constructs are decomposed into separate operations. However, this does not adhere to `acc` dialect's goal to be able to regenerate semantically equivalent clauses as user's intent. Thus, add an attribute to keep track of the combined constructs. --- mlir/include/mlir/Dialect/OpenACC/OpenACC.h | 4 ++ .../mlir/Dialect/OpenACC/OpenACCOps.td | 34 +++++++++++-- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 51 +++++++++++++++++++ mlir/test/Dialect/OpenACC/invalid.mlir | 40 +++++++++++++++ mlir/test/Dialect/OpenACC/ops.mlir | 46 +++++++++++++++-- 5 files changed, 168 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index bb3b9617c24edb..0c8e0b45878206 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -133,6 +133,10 @@ static constexpr StringLiteral getRoutineInfoAttrName() { return StringLiteral("acc.routine_info"); } +static constexpr StringLiteral getCombinedConstructsAttrName() { + return CombinedConstructsTypeAttr::name; +} + struct RuntimeCounters : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "AccRuntimeCounters"; } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 6da7a742bbed8c..b5ad46361fa698 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -218,6 +218,24 @@ def GangArgTypeArrayAttr : let constBuilderCall = ?; } +// Combined constructs enumerations +def OpenACC_KernelsLoop : I32EnumAttrCase<"KernelsLoop", 1, "kernels_loop">; +def OpenACC_ParallelLoop : I32EnumAttrCase<"ParallelLoop", 2, "parallel_loop">; +def OpenACC_SerialLoop : I32EnumAttrCase<"SerialLoop", 3, "serial_loop">; + +def OpenACC_CombinedConstructsType : I32EnumAttr<"CombinedConstructsType", + "Differentiate between combined constructs", + [OpenACC_KernelsLoop, OpenACC_ParallelLoop, OpenACC_SerialLoop]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::acc"; +} + +def OpenACC_CombinedConstructsAttr : EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + // Define a resource for the OpenACC runtime counters. def OpenACC_RuntimeCounters : Resource<"::mlir::acc::RuntimeCounters">; @@ -933,7 +951,8 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", Variadic:$gangFirstPrivateOperands, OptionalAttr:$firstprivatizations, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -993,6 +1012,7 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1068,7 +1088,8 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", Variadic:$gangFirstPrivateOperands, OptionalAttr:$firstprivatizations, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -1109,6 +1130,7 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1182,7 +1204,8 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", Optional:$selfCond, UnitAttr:$selfAttr, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -1242,6 +1265,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1573,7 +1597,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Variadic:$privateOperands, OptionalAttr:$privatizations, Variadic:$reductionOperands, - OptionalAttr:$reductionRecipes + OptionalAttr:$reductionRecipes, + OptionalAttr:$combined ); let results = (outs Variadic:$results); @@ -1665,6 +1690,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", let hasCustomAssemblyFormat = 1; let assemblyFormat = [{ + custom($combined) oilist( `gang` `` custom($gangOperands, type($gangOperands), $gangOperandsArgType, $gangOperandsDeviceType, diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 18187e7d4f66cd..c09a3403f9a3e3 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -1283,6 +1283,50 @@ static void printDeviceTypeOperandsWithKeywordOnly( p << ")"; } +static ParseResult +parseCombinedConstructsLoop(mlir::OpAsmParser &parser, + mlir::acc::CombinedConstructsTypeAttr &attr) { + if (succeeded(parser.parseOptionalKeyword("combined"))) { + if (parser.parseLParen()) + return failure(); + if (succeeded(parser.parseOptionalKeyword("kernels"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::KernelsLoop); + } else if (succeeded(parser.parseOptionalKeyword("parallel"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::ParallelLoop); + } else if (succeeded(parser.parseOptionalKeyword("serial"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::SerialLoop); + } else { + parser.emitError(parser.getCurrentLocation(), + "expected compute construct name"); + return failure(); + } + if (parser.parseRParen()) + return failure(); + } + return success(); +} + +static void +printCombinedConstructsLoop(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::acc::CombinedConstructsTypeAttr attr) { + if (attr) { + switch (attr.getValue()) { + case mlir::acc::CombinedConstructsType::KernelsLoop: + p << "combined(kernels)"; + break; + case mlir::acc::CombinedConstructsType::ParallelLoop: + p << "combined(parallel)"; + break; + case mlir::acc::CombinedConstructsType::SerialLoop: + p << "combined(serial)"; + break; + }; + } +} + //===----------------------------------------------------------------------===// // SerialOp //===----------------------------------------------------------------------===// @@ -1851,6 +1895,13 @@ LogicalResult acc::LoopOp::verify() { "reductions", false))) return failure(); + if (getCombined().has_value() && + (getCombined().value() != acc::CombinedConstructsType::ParallelLoop && + getCombined().value() != acc::CombinedConstructsType::KernelsLoop && + getCombined().value() != acc::CombinedConstructsType::SerialLoop)) { + return emitError("unexpected combined constructs attribute"); + } + // Check non-empty body(). if (getRegion().empty()) return emitError("expected non-empty body."); diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 70747b7e2acf4b..ec5430420524ce 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -738,3 +738,43 @@ func.func @acc_atomic_capture(%x: memref, %y: memref, %v: memref, acc.terminator } } + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected 'loop'}} + acc.parallel combined() { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected compute construct name}} + acc.loop combined(loop) { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected 'loop'}} + acc.parallel combined(parallel loop) { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected ')'}} + acc.loop combined(parallel loop) { + } + + return +} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 1739b3de3e65fd..2ef2178cb2b63a 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1846,9 +1846,49 @@ func.func @acc_atomic_capture(%v: memref, %x: memref, %expr: i32) { // ----- -%c2 = arith.constant 2 : i32 -%c1 = arith.constant 1 : i32 -acc.parallel num_gangs({%c2 : i32} [#acc.device_type], {%c1 : i32, %c1 : i32, %c1 : i32} [#acc.device_type]) { +// CHECK-LABEL: func.func @acc_num_gangs +func.func @acc_num_gangs() { + %c2 = arith.constant 2 : i32 + %c1 = arith.constant 1 : i32 + acc.parallel num_gangs({%c2 : i32} [#acc.device_type], {%c1 : i32, %c1 : i32, %c1 : i32} [#acc.device_type]) { + } + + return } // CHECK: acc.parallel num_gangs({%c2{{.*}} : i32} [#acc.device_type], {%c1{{.*}} : i32, %c1{{.*}} : i32, %c1{{.*}} : i32} [#acc.device_type]) + +// ----- + +// CHECK-LABEL: func.func @acc_combined +func.func @acc_combined() { + acc.parallel combined(loop) { + acc.loop combined(parallel) { + acc.yield + } + acc.terminator + } + + acc.kernels combined(loop) { + acc.loop combined(kernels) { + acc.yield + } + acc.terminator + } + + acc.serial combined(loop) { + acc.loop combined(serial) { + acc.yield + } + acc.terminator + } + + return +} + +// CHECK: acc.parallel combined(loop) +// CHECK: acc.loop combined(parallel) +// CHECK: acc.kernels combined(loop) +// CHECK: acc.loop combined(kernels) +// CHECK: acc.serial combined(loop) +// CHECK: acc.loop combined(serial) From 6515930b0cc4aa2e11e75728ef6cbeecbe5caec2 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 7 Mar 2024 10:13:14 -0800 Subject: [PATCH 066/158] [lldb] Minor cleanup in StoringDiagnosticConsumer (#84263) Removes an unused field. Retypes unshared smart pointers to `unique_ptr`. --- .../Clang/ClangModulesDeclVendor.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 2d778e410b0e73..024fc75a5dd590 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -67,12 +67,11 @@ class StoringDiagnosticConsumer : public clang::DiagnosticConsumer { std::vector m_diagnostics; /// The DiagnosticPrinter used for creating the full diagnostic messages /// that are stored in m_diagnostics. - std::shared_ptr m_diag_printer; + std::unique_ptr m_diag_printer; /// Output stream of m_diag_printer. - std::shared_ptr m_os; + std::unique_ptr m_os; /// Output string filled by m_os. Will be reused for different diagnostics. std::string m_output; - Log *m_log; /// A Progress with explicitly managed lifetime. std::unique_ptr m_current_progress_up; std::vector m_module_build_stack; @@ -134,12 +133,10 @@ class ClangModulesDeclVendorImpl : public ClangModulesDeclVendor { } // anonymous namespace StoringDiagnosticConsumer::StoringDiagnosticConsumer() { - m_log = GetLog(LLDBLog::Expressions); - - clang::DiagnosticOptions *m_options = new clang::DiagnosticOptions(); - m_os = std::make_shared(m_output); + auto *options = new clang::DiagnosticOptions(); + m_os = std::make_unique(m_output); m_diag_printer = - std::make_shared(*m_os, m_options); + std::make_unique(*m_os, options); } void StoringDiagnosticConsumer::HandleDiagnostic( From 9e4f289bd6c905a2a436b3311ca49ad2d6328060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 7 Mar 2024 19:37:24 +0100 Subject: [PATCH 067/158] [clang][Interp][NFC] Add [[nodiscard]] attribute to emit functions --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 3 ++- clang/utils/TableGen/ClangOpcodesEmitter.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 712218f5de2e42..a384e191464fea 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1646,7 +1646,8 @@ bool ByteCodeExprGen::VisitMaterializeTemporaryExpr( SubExpr, *SubExprT, /*IsConst=*/true, /*IsExtended=*/true)) { if (!this->visit(SubExpr)) return false; - this->emitSetLocal(*SubExprT, *LocalIndex, E); + if (!this->emitSetLocal(*SubExprT, *LocalIndex, E)) + return false; return this->emitGetPtrLocal(*LocalIndex, E); } } else { diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp index 1c41301ab3aeeb..120e1e2efa32b4 100644 --- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp +++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp @@ -274,7 +274,7 @@ void ClangOpcodesEmitter::EmitGroup(raw_ostream &OS, StringRef N, // Emit the prototype of the group emitter in the header. OS << "#if defined(GET_EVAL_PROTO) || defined(GET_LINK_PROTO)\n"; - OS << "bool " << EmitFuncName << "("; + OS << "[[nodiscard]] bool " << EmitFuncName << "("; for (size_t I = 0, N = Types->size(); I < N; ++I) OS << "PrimType, "; for (auto *Arg : Args) From 41572177d129bf19f13f077a30b582fd3b8f790c Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Thu, 7 Mar 2024 22:38:44 +0400 Subject: [PATCH 068/158] [clang] Add CodeGen tests for CWG 5xx issues (#84303) This patch covers [CWG519](https://cplusplus.github.io/CWG/issues/519.html) "Null pointer preservation in `void*` conversions", [CWG571](https://cplusplus.github.io/CWG/issues/571.html) "References declared const". --- clang/test/CXX/drs/dr519.cpp | 36 ++++++++++++++++++++++++++++++++++++ clang/test/CXX/drs/dr571.cpp | 20 ++++++++++++++++++++ clang/test/CXX/drs/dr5xx.cpp | 19 ++----------------- clang/www/cxx_dr_status.html | 4 ++-- 4 files changed, 60 insertions(+), 19 deletions(-) create mode 100644 clang/test/CXX/drs/dr519.cpp create mode 100644 clang/test/CXX/drs/dr571.cpp diff --git a/clang/test/CXX/drs/dr519.cpp b/clang/test/CXX/drs/dr519.cpp new file mode 100644 index 00000000000000..67c01d95ef7c6f --- /dev/null +++ b/clang/test/CXX/drs/dr519.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr519 { // dr519: 2.7 +void f() { + int *a = 0; + void *v = a; + bool c1 = v == static_cast(0); + + void *w = 0; + int *b = static_cast(w); + bool c2 = b == static_cast(0); +} +} // namespace dr519 + +// We're checking that `null`s that were initially stored in `a` and `w` +// are simply copied over all the way to respective comparisons with `null`. + +// CHECK-LABEL: define {{.*}} void @dr519::f()() +// CHECK: store ptr null, ptr [[A:%.+]], +// CHECK-NEXT: [[TEMP_A:%.+]] = load ptr, ptr [[A]] +// CHECK-NEXT: store ptr [[TEMP_A]], ptr [[V:%.+]], +// CHECK-NEXT: [[TEMP_V:%.+]] = load ptr, ptr [[V]] +// CHECK-NEXT: {{.+}} = icmp eq ptr [[TEMP_V]], null + +// CHECK: store ptr null, ptr [[W:%.+]], +// CHECK-NEXT: [[TEMP_W:%.+]] = load ptr, ptr [[W]] +// CHECK-NEXT: store ptr [[TEMP_W]], ptr [[B:%.+]], +// CHECK-NEXT: [[TEMP_B:%.+]] = load ptr, ptr [[B]] +// CHECK-NEXT: {{.+}} = icmp eq ptr [[TEMP_B]], null +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr571.cpp b/clang/test/CXX/drs/dr571.cpp new file mode 100644 index 00000000000000..19a85b7ddc3508 --- /dev/null +++ b/clang/test/CXX/drs/dr571.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr571 { // dr571: 2.7 + typedef int &ir; + int n; + const ir r = n; + // expected-warning@-1 {{'const' qualifier on reference type 'ir' (aka 'int &') has no effect}} + ir r2 = n; +} + +// Entities have external linkage by default. + +// CHECK: @dr571::r = constant ptr @dr571::n +// CHECK: @dr571::r2 = constant ptr @dr571::n diff --git a/clang/test/CXX/drs/dr5xx.cpp b/clang/test/CXX/drs/dr5xx.cpp index 0e1de342f6706f..426b368b390ae6 100644 --- a/clang/test/CXX/drs/dr5xx.cpp +++ b/clang/test/CXX/drs/dr5xx.cpp @@ -141,15 +141,7 @@ namespace dr518 { // dr518: yes c++11 // cxx98-error@-1 {{commas at the end of enumerator lists are a C++11 extension}} } -namespace dr519 { // dr519: yes -// FIXME: Add a codegen test. -#if __cplusplus >= 201103L -#define fold(x) (__builtin_constant_p(x) ? (x) : (x)) - int test[fold((int*)(void*)0) ? -1 : 1]; -#undef fold -#endif -} - +// dr519 is in dr519.cpp // dr520: na // dr521: no @@ -800,14 +792,7 @@ namespace dr570 { // dr570: dup 633 // expected-note@#dr570-r {{previous definition is here}} } -namespace dr571 { // dr571 unknown - // FIXME: Add a codegen test. - typedef int &ir; - int n; - // FIXME: Test if this has internal linkage. - const ir r = n; - // expected-warning@-1 {{'const' qualifier on reference type 'ir' (aka 'int &') has no effect}} -} +// dr571 is in dr571.cpp namespace dr572 { // dr572: yes enum E { a = 1, b = 2 }; diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 774c71bc1cb6b7..503472a2cae4eb 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -3154,7 +3154,7 @@

C++ defect report implementation status

519 CD1 Null pointer preservation in void* conversions - Yes + Clang 2.7 520 @@ -3468,7 +3468,7 @@

C++ defect report implementation status

571 CD2 References declared const - Unknown + Clang 2.7 572 From a6a6fca7911feab8325129ea57247303b3c8d558 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 7 Mar 2024 10:52:43 -0800 Subject: [PATCH 069/158] [ubsan][pgo] Pass to remove ubsan checks based on profile data (#83471) UBSAN checks can be too expensive to be used in release binaries. However not all code affect performace in the same way. Removing small number of checks in hot code we can performance loss, preserving most of the checks. --- .../Instrumentation/RemoveTrapsPass.h | 32 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/RemoveTrapsPass.cpp | 104 +++++ .../Transforms/RemoveTraps/remove-traps.ll | 397 ++++++++++++++++++ 6 files changed, 536 insertions(+) create mode 100644 llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h create mode 100644 llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp create mode 100644 llvm/test/Transforms/RemoveTraps/remove-traps.ll diff --git a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h new file mode 100644 index 00000000000000..58f6bbcec5dc9d --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h @@ -0,0 +1,32 @@ +//===- RemoveTrapsPass.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for the pass responsible for removing +/// expensive ubsan checks. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +// This pass is responsible for removing optional traps, like llvm.ubsantrap +// from the hot code. +class RemoveTrapsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e0bc57f8bf72f0..4d1eb10d2d41c6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -177,6 +177,7 @@ #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index a345e8d72d9399..41f16d0915bf23 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -422,6 +422,7 @@ FUNCTION_PASS("print", UniformityInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) +FUNCTION_PASS("remove-traps", RemoveTrapsPass()) FUNCTION_PASS("safe-stack", SafeStackPass(TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index ee9aa73ff03403..b23a6ed1f08415 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_component_library(LLVMInstrumentation PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp PoisonChecking.cpp + RemoveTrapsPass.cpp SanitizerCoverage.cpp SanitizerBinaryMetadata.cpp ValueProfileCollector.cpp diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp new file mode 100644 index 00000000000000..d87f7482a21d25 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -0,0 +1,104 @@ +//===- RemoveTrapsPass.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "remove-traps" + +static cl::opt HotPercentileCutoff( + "remove-traps-percentile-cutoff-hot", cl::init(0), + cl::desc("Alternative hot percentile cuttoff. By default " + "`-profile-summary-cutoff-hot` is used.")); + +static cl::opt + RandomRate("remove-traps-random-rate", cl::init(0.0), + cl::desc("Probability value in the range [0.0, 1.0] of " + "unconditional pseudo-random checks removal.")); + +STATISTIC(NumChecksTotal, "Number of checks"); +STATISTIC(NumChecksRemoved, "Number of removed checks"); + +static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, + const ProfileSummaryInfo *PSI) { + SmallVector Remove; + std::unique_ptr Rng; + + auto ShouldRemove = [&](bool IsHot) { + if (!RandomRate.getNumOccurrences()) + return IsHot; + if (!Rng) + Rng = F.getParent()->createRNG(F.getName()); + std::bernoulli_distribution D(RandomRate); + return D(*Rng); + }; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + IntrinsicInst *II = dyn_cast(&I); + if (!II) + continue; + auto ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::ubsantrap: { + ++NumChecksTotal; + + bool IsHot = false; + if (PSI) { + uint64_t Count = 0; + for (const auto *PR : predecessors(&BB)) + Count += BFI.getBlockProfileCount(PR).value_or(0); + + IsHot = + HotPercentileCutoff.getNumOccurrences() + ? (HotPercentileCutoff > 0 && + PSI->isHotCountNthPercentile(HotPercentileCutoff, Count)) + : PSI->isHotCount(Count); + } + + if (ShouldRemove(IsHot)) { + Remove.push_back(II); + ++NumChecksRemoved; + } + break; + } + default: + break; + } + } + } + + for (IntrinsicInst *I : Remove) + I->eraseFromParent(); + + return !Remove.empty(); +} + +PreservedAnalyses RemoveTrapsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (F.isDeclaration()) + return PreservedAnalyses::all(); + auto &MAMProxy = AM.getResult(F); + ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult(*F.getParent()); + BlockFrequencyInfo &BFI = AM.getResult(F); + + return removeUbsanTraps(F, BFI, PSI) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll new file mode 100644 index 00000000000000..71549e7d9b4122 --- /dev/null +++ b/llvm/test/Transforms/RemoveTraps/remove-traps.ll @@ -0,0 +1,397 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes='function(remove-traps)' -S | FileCheck %s --check-prefixes=NOPROFILE +; RUN: opt < %s -passes='function(remove-traps)' -remove-traps-random-rate=1 -S | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT +; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 + +target triple = "x86_64-pc-linux-gnu" + +declare void @llvm.ubsantrap(i8 immarg) + +define dso_local noundef i32 @simple(ptr noundef readonly %0) { +; NOPROFILE-LABEL: define dso_local noundef i32 @simple( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @simple( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @simple( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @simple( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @hot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @hot( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @hot( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @hot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + +define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @veryHot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @veryHot( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @veryHot( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @veryHot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readonly %1) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchColdFnHot( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; ALL-LABEL: define dso_local noundef i32 @branchColdFnHot( +; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL: 6: +; ALL-NEXT: unreachable +; ALL: 7: +; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; ALL-NEXT: br label [[TMP9]] +; ALL: 9: +; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; ALL-NEXT: ret i32 [[TMP10]] +; +; HOT-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT: 6: +; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT-NEXT: unreachable +; HOT: 7: +; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT-NEXT: br label [[TMP9]] +; HOT: 9: +; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !38 + +4: + %5 = icmp eq ptr %1, null + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readonly %1) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchHotFnCold( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; ALL-LABEL: define dso_local noundef i32 @branchHotFnCold( +; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL: 6: +; ALL-NEXT: unreachable +; ALL: 7: +; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; ALL-NEXT: br label [[TMP9]] +; ALL: 9: +; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; ALL-NEXT: ret i32 [[TMP10]] +; +; HOT-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT: 6: +; HOT-NEXT: unreachable +; HOT: 7: +; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT-NEXT: br label [[TMP9]] +; HOT: 9: +; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !37 + +4: + %5 = icmp eq ptr %1, null + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +!llvm.module.flags = !{!6} +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 30000} +!10 = !{!"MaxCount", i64 10000} +!11 = !{!"MaxInternalCount", i64 10000} +!12 = !{!"MaxFunctionCount", i64 10000} +!13 = !{!"NumCounts", i64 3} +!14 = !{!"NumFunctions", i64 5} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !29, !30, !32, !34} +!19 = !{i32 10000, i64 10000, i32 3} +!29 = !{i32 950000, i64 5000, i32 3} +!30 = !{i32 990000, i64 500, i32 4} +!32 = !{i32 999900, i64 250, i32 4} +!34 = !{i32 999999, i64 1, i32 6} + +!36 = !{!"function_entry_count", i64 1000} +!39 = !{!"function_entry_count", i64 7000} + +!37 = !{!"branch_weights", i32 1, i32 1000} +!38 = !{!"branch_weights", i32 1000, i32 1} + +;. +; NOPROFILE: [[PROF16]] = !{!"function_entry_count", i64 1000} +; NOPROFILE: [[PROF17]] = !{!"function_entry_count", i64 7000} +; NOPROFILE: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; NOPROFILE: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; ALL: [[PROF16]] = !{!"function_entry_count", i64 1000} +; ALL: [[PROF17]] = !{!"function_entry_count", i64 7000} +; ALL: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; ALL: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT70: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT70: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT70: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT70: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. From 54c955b828bbdcf46586556339cbd3cf8f205b4f Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 29 Feb 2024 15:06:28 -0800 Subject: [PATCH 070/158] Change the return type of ValueObject::CalculateNumChildren to uint32_t. In the end this value comes from TypeSystem::GetNumChildren which returns a uint32_t, so ValueObject should be consistent with that. --- lldb/include/lldb/Core/ValueObject.h | 2 +- lldb/include/lldb/Core/ValueObjectCast.h | 2 +- lldb/include/lldb/Core/ValueObjectChild.h | 2 +- lldb/include/lldb/Core/ValueObjectConstResult.h | 2 +- lldb/include/lldb/Core/ValueObjectDynamicValue.h | 2 +- lldb/include/lldb/Core/ValueObjectMemory.h | 2 +- lldb/include/lldb/Core/ValueObjectRegister.h | 4 ++-- lldb/include/lldb/Core/ValueObjectSyntheticFilter.h | 2 +- lldb/include/lldb/Core/ValueObjectVTable.h | 2 +- lldb/include/lldb/Core/ValueObjectVariable.h | 2 +- lldb/include/lldb/Target/StackFrameRecognizer.h | 2 +- lldb/source/Core/ValueObjectCast.cpp | 2 +- lldb/source/Core/ValueObjectChild.cpp | 2 +- lldb/source/Core/ValueObjectConstResult.cpp | 2 +- lldb/source/Core/ValueObjectDynamicValue.cpp | 2 +- lldb/source/Core/ValueObjectMemory.cpp | 2 +- lldb/source/Core/ValueObjectRegister.cpp | 4 ++-- lldb/source/Core/ValueObjectSyntheticFilter.cpp | 2 +- lldb/source/Core/ValueObjectVTable.cpp | 4 ++-- lldb/source/Core/ValueObjectVariable.cpp | 2 +- 20 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 4c0b0b2dae6cd4..05dd64f5634fda 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -958,7 +958,7 @@ class ValueObject { int32_t synthetic_index); /// Should only be called by ValueObject::GetNumChildren(). - virtual size_t CalculateNumChildren(uint32_t max = UINT32_MAX) = 0; + virtual uint32_t CalculateNumChildren(uint32_t max = UINT32_MAX) = 0; void SetNumChildren(size_t num_children); diff --git a/lldb/include/lldb/Core/ValueObjectCast.h b/lldb/include/lldb/Core/ValueObjectCast.h index fe053c12d9c343..51c647680d5227 100644 --- a/lldb/include/lldb/Core/ValueObjectCast.h +++ b/lldb/include/lldb/Core/ValueObjectCast.h @@ -33,7 +33,7 @@ class ValueObjectCast : public ValueObject { std::optional GetByteSize() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectChild.h b/lldb/include/lldb/Core/ValueObjectChild.h index 46b14e6840f0dc..47a13be08bb83b 100644 --- a/lldb/include/lldb/Core/ValueObjectChild.h +++ b/lldb/include/lldb/Core/ValueObjectChild.h @@ -39,7 +39,7 @@ class ValueObjectChild : public ValueObject { lldb::ValueType GetValueType() const override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ConstString GetTypeName() override; diff --git a/lldb/include/lldb/Core/ValueObjectConstResult.h b/lldb/include/lldb/Core/ValueObjectConstResult.h index d61df859bebce4..9f1246cf2a7874 100644 --- a/lldb/include/lldb/Core/ValueObjectConstResult.h +++ b/lldb/include/lldb/Core/ValueObjectConstResult.h @@ -67,7 +67,7 @@ class ValueObjectConstResult : public ValueObject { lldb::ValueType GetValueType() const override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ConstString GetTypeName() override; diff --git a/lldb/include/lldb/Core/ValueObjectDynamicValue.h b/lldb/include/lldb/Core/ValueObjectDynamicValue.h index 2758b4e5bb564d..21a9b409fd5bd7 100644 --- a/lldb/include/lldb/Core/ValueObjectDynamicValue.h +++ b/lldb/include/lldb/Core/ValueObjectDynamicValue.h @@ -43,7 +43,7 @@ class ValueObjectDynamicValue : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectMemory.h b/lldb/include/lldb/Core/ValueObjectMemory.h index 3c01df388d2e6d..a74b325546b03c 100644 --- a/lldb/include/lldb/Core/ValueObjectMemory.h +++ b/lldb/include/lldb/Core/ValueObjectMemory.h @@ -47,7 +47,7 @@ class ValueObjectMemory : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectRegister.h b/lldb/include/lldb/Core/ValueObjectRegister.h index 2e47eee3d7f793..6c470c1a686503 100644 --- a/lldb/include/lldb/Core/ValueObjectRegister.h +++ b/lldb/include/lldb/Core/ValueObjectRegister.h @@ -47,7 +47,7 @@ class ValueObjectRegisterSet : public ValueObject { ConstString GetQualifiedTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ValueObject *CreateChildAtIndex(size_t idx, bool synthetic_array_member, int32_t synthetic_index) override; @@ -95,7 +95,7 @@ class ValueObjectRegister : public ValueObject { ConstString GetTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; bool SetValueFromCString(const char *value_str, Status &error) override; diff --git a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h index 67596232eafd1e..57794072ff9229 100644 --- a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h +++ b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h @@ -47,7 +47,7 @@ class ValueObjectSynthetic : public ValueObject { bool MightHaveChildren() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectVTable.h b/lldb/include/lldb/Core/ValueObjectVTable.h index 217ff8d0d334ce..e7e14fc83d7892 100644 --- a/lldb/include/lldb/Core/ValueObjectVTable.h +++ b/lldb/include/lldb/Core/ValueObjectVTable.h @@ -64,7 +64,7 @@ class ValueObjectVTable : public ValueObject { std::optional GetByteSize() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ValueObject *CreateChildAtIndex(size_t idx, bool synthetic_array_member, int32_t synthetic_index) override; diff --git a/lldb/include/lldb/Core/ValueObjectVariable.h b/lldb/include/lldb/Core/ValueObjectVariable.h index bba28ce567b2a0..da270300df0b30 100644 --- a/lldb/include/lldb/Core/ValueObjectVariable.h +++ b/lldb/include/lldb/Core/ValueObjectVariable.h @@ -46,7 +46,7 @@ class ValueObjectVariable : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Target/StackFrameRecognizer.h b/lldb/include/lldb/Target/StackFrameRecognizer.h index 419f0c0aac1f86..e111f4a4dc7029 100644 --- a/lldb/include/lldb/Target/StackFrameRecognizer.h +++ b/lldb/include/lldb/Target/StackFrameRecognizer.h @@ -164,7 +164,7 @@ class ValueObjectRecognizerSynthesizedValue : public ValueObject { m_value = m_parent->GetValue(); return true; } - size_t CalculateNumChildren(uint32_t max = UINT32_MAX) override { + uint32_t CalculateNumChildren(uint32_t max = UINT32_MAX) override { return m_parent->GetNumChildren(max); } CompilerType GetCompilerTypeImpl() override { diff --git a/lldb/source/Core/ValueObjectCast.cpp b/lldb/source/Core/ValueObjectCast.cpp index 0882d4b3677619..a5c555f86b1372 100644 --- a/lldb/source/Core/ValueObjectCast.cpp +++ b/lldb/source/Core/ValueObjectCast.cpp @@ -41,7 +41,7 @@ ValueObjectCast::~ValueObjectCast() = default; CompilerType ValueObjectCast::GetCompilerTypeImpl() { return m_cast_type; } -size_t ValueObjectCast::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectCast::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren( true, &exe_ctx); diff --git a/lldb/source/Core/ValueObjectChild.cpp b/lldb/source/Core/ValueObjectChild.cpp index 39067387dc9782..2e55dd7726bdd9 100644 --- a/lldb/source/Core/ValueObjectChild.cpp +++ b/lldb/source/Core/ValueObjectChild.cpp @@ -49,7 +49,7 @@ lldb::ValueType ValueObjectChild::GetValueType() const { return m_parent->GetValueType(); } -size_t ValueObjectChild::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectChild::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectConstResult.cpp b/lldb/source/Core/ValueObjectConstResult.cpp index 693da1a551f8eb..5c7aa4452b70db 100644 --- a/lldb/source/Core/ValueObjectConstResult.cpp +++ b/lldb/source/Core/ValueObjectConstResult.cpp @@ -216,7 +216,7 @@ std::optional ValueObjectConstResult::GetByteSize() { void ValueObjectConstResult::SetByteSize(size_t size) { m_byte_size = size; } -size_t ValueObjectConstResult::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectConstResult::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectDynamicValue.cpp b/lldb/source/Core/ValueObjectDynamicValue.cpp index e6e30dce9d1e4a..4e64760371ae52 100644 --- a/lldb/source/Core/ValueObjectDynamicValue.cpp +++ b/lldb/source/Core/ValueObjectDynamicValue.cpp @@ -85,7 +85,7 @@ ConstString ValueObjectDynamicValue::GetDisplayTypeName() { return m_parent->GetDisplayTypeName(); } -size_t ValueObjectDynamicValue::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectDynamicValue::CalculateNumChildren(uint32_t max) { const bool success = UpdateValueIfNeeded(false); if (success && m_dynamic_type_info.HasType()) { ExecutionContext exe_ctx(GetExecutionContextRef()); diff --git a/lldb/source/Core/ValueObjectMemory.cpp b/lldb/source/Core/ValueObjectMemory.cpp index 3f125a7bee8c77..7f68236c7884ec 100644 --- a/lldb/source/Core/ValueObjectMemory.cpp +++ b/lldb/source/Core/ValueObjectMemory.cpp @@ -126,7 +126,7 @@ ConstString ValueObjectMemory::GetDisplayTypeName() { return m_compiler_type.GetDisplayTypeName(); } -size_t ValueObjectMemory::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectMemory::CalculateNumChildren(uint32_t max) { if (m_type_sp) { auto child_count = m_type_sp->GetNumChildren(true); return child_count <= max ? child_count : max; diff --git a/lldb/source/Core/ValueObjectRegister.cpp b/lldb/source/Core/ValueObjectRegister.cpp index c2b84c11347359..d4c144cc7edb9a 100644 --- a/lldb/source/Core/ValueObjectRegister.cpp +++ b/lldb/source/Core/ValueObjectRegister.cpp @@ -74,7 +74,7 @@ ConstString ValueObjectRegisterSet::GetQualifiedTypeName() { return ConstString(); } -size_t ValueObjectRegisterSet::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectRegisterSet::CalculateNumChildren(uint32_t max) { const RegisterSet *reg_set = m_reg_ctx_sp->GetRegisterSet(m_reg_set_idx); if (reg_set) { auto reg_count = reg_set->num_registers; @@ -220,7 +220,7 @@ ConstString ValueObjectRegister::GetTypeName() { return m_type_name; } -size_t ValueObjectRegister::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectRegister::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectSyntheticFilter.cpp b/lldb/source/Core/ValueObjectSyntheticFilter.cpp index e8b4b02d11a0bb..ae358fba4bd815 100644 --- a/lldb/source/Core/ValueObjectSyntheticFilter.cpp +++ b/lldb/source/Core/ValueObjectSyntheticFilter.cpp @@ -84,7 +84,7 @@ ConstString ValueObjectSynthetic::GetDisplayTypeName() { return m_parent->GetDisplayTypeName(); } -size_t ValueObjectSynthetic::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectSynthetic::CalculateNumChildren(uint32_t max) { Log *log = GetLog(LLDBLog::DataFormatters); UpdateValueIfNeeded(); diff --git a/lldb/source/Core/ValueObjectVTable.cpp b/lldb/source/Core/ValueObjectVTable.cpp index 177ae4167a1d45..4d1cbb8d2f6fc2 100644 --- a/lldb/source/Core/ValueObjectVTable.cpp +++ b/lldb/source/Core/ValueObjectVTable.cpp @@ -33,7 +33,7 @@ class ValueObjectVTableChild : public ValueObject { std::optional GetByteSize() override { return m_addr_size; }; - size_t CalculateNumChildren(uint32_t max) override { return 0; }; + uint32_t CalculateNumChildren(uint32_t max) override { return 0; }; ValueType GetValueType() const override { return eValueTypeVTableEntry; }; @@ -159,7 +159,7 @@ std::optional ValueObjectVTable::GetByteSize() { return std::nullopt; } -size_t ValueObjectVTable::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectVTable::CalculateNumChildren(uint32_t max) { if (UpdateValueIfNeeded(false)) return m_num_vtable_entries <= max ? m_num_vtable_entries : max; return 0; diff --git a/lldb/source/Core/ValueObjectVariable.cpp b/lldb/source/Core/ValueObjectVariable.cpp index 9f8df847f28a8e..dc62bb6358dc97 100644 --- a/lldb/source/Core/ValueObjectVariable.cpp +++ b/lldb/source/Core/ValueObjectVariable.cpp @@ -94,7 +94,7 @@ ConstString ValueObjectVariable::GetQualifiedTypeName() { return ConstString(); } -size_t ValueObjectVariable::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectVariable::CalculateNumChildren(uint32_t max) { CompilerType type(GetCompilerType()); if (!type.IsValid()) From 3d7c5b80e38b01223811eb557a5e9953cfa2154d Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 29 Feb 2024 15:35:35 -0800 Subject: [PATCH 071/158] Change the return type of SyntheticFrontend::CalculateNumChildren to int32_t This way it is consistent with ValueObject and TypeSystem. --- .../lldb/DataFormatters/TypeSynthetic.h | 12 +++++----- .../lldb/DataFormatters/VectorIterator.h | 2 +- .../Core/ValueObjectSyntheticFilter.cpp | 2 +- lldb/source/DataFormatters/TypeSynthetic.cpp | 4 ++-- lldb/source/DataFormatters/VectorType.cpp | 2 +- .../Language/CPlusPlus/BlockPointer.cpp | 2 +- .../Plugins/Language/CPlusPlus/Coroutines.cpp | 2 +- .../Plugins/Language/CPlusPlus/Coroutines.h | 2 +- .../Language/CPlusPlus/GenericBitset.cpp | 2 +- .../Language/CPlusPlus/GenericOptional.cpp | 2 +- .../Plugins/Language/CPlusPlus/LibCxx.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibCxx.h | 8 +++---- .../Language/CPlusPlus/LibCxxAtomic.cpp | 4 ++-- .../CPlusPlus/LibCxxInitializerList.cpp | 4 ++-- .../Plugins/Language/CPlusPlus/LibCxxList.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibCxxMap.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxQueue.cpp | 2 +- .../CPlusPlus/LibCxxRangesRefView.cpp | 2 +- .../Plugins/Language/CPlusPlus/LibCxxSpan.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxTuple.cpp | 2 +- .../Language/CPlusPlus/LibCxxUnorderedMap.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxValarray.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxVariant.cpp | 2 +- .../Language/CPlusPlus/LibCxxVector.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibStdcpp.cpp | 10 ++++---- .../Language/CPlusPlus/LibStdcppTuple.cpp | 4 ++-- .../CPlusPlus/LibStdcppUniquePointer.cpp | 4 ++-- lldb/source/Plugins/Language/ObjC/Cocoa.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSArray.cpp | 18 +++++++------- .../Plugins/Language/ObjC/NSDictionary.cpp | 24 +++++++++---------- lldb/source/Plugins/Language/ObjC/NSError.cpp | 2 +- .../Plugins/Language/ObjC/NSException.cpp | 2 +- .../Plugins/Language/ObjC/NSIndexPath.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSSet.cpp | 14 +++++------ 34 files changed, 89 insertions(+), 89 deletions(-) diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h index 23cc054b399a67..7bb011c1579449 100644 --- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h +++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h @@ -38,9 +38,9 @@ class SyntheticChildrenFrontEnd { virtual ~SyntheticChildrenFrontEnd() = default; - virtual size_t CalculateNumChildren() = 0; + virtual uint32_t CalculateNumChildren() = 0; - virtual size_t CalculateNumChildren(uint32_t max) { + virtual uint32_t CalculateNumChildren(uint32_t max) { auto count = CalculateNumChildren(); return count <= max ? count : max; } @@ -109,7 +109,7 @@ class SyntheticValueProviderFrontEnd : public SyntheticChildrenFrontEnd { ~SyntheticValueProviderFrontEnd() override = default; - size_t CalculateNumChildren() override { return 0; } + uint32_t CalculateNumChildren() override { return 0; } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return nullptr; } @@ -322,7 +322,7 @@ class TypeFilterImpl : public SyntheticChildren { ~FrontEnd() override = default; - size_t CalculateNumChildren() override { return filter->GetCount(); } + uint32_t CalculateNumChildren() override { return filter->GetCount(); } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { if (idx >= filter->GetCount()) @@ -426,9 +426,9 @@ class ScriptedSyntheticChildren : public SyntheticChildren { bool IsValid(); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; diff --git a/lldb/include/lldb/DataFormatters/VectorIterator.h b/lldb/include/lldb/DataFormatters/VectorIterator.h index 5f774bb72c3a3a..88500b0bfdd400 100644 --- a/lldb/include/lldb/DataFormatters/VectorIterator.h +++ b/lldb/include/lldb/DataFormatters/VectorIterator.h @@ -24,7 +24,7 @@ class VectorIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { VectorIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp, llvm::ArrayRef item_names); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; diff --git a/lldb/source/Core/ValueObjectSyntheticFilter.cpp b/lldb/source/Core/ValueObjectSyntheticFilter.cpp index ae358fba4bd815..b03bd9a80e506e 100644 --- a/lldb/source/Core/ValueObjectSyntheticFilter.cpp +++ b/lldb/source/Core/ValueObjectSyntheticFilter.cpp @@ -31,7 +31,7 @@ class DummySyntheticFrontEnd : public SyntheticChildrenFrontEnd { DummySyntheticFrontEnd(ValueObject &backend) : SyntheticChildrenFrontEnd(backend) {} - size_t CalculateNumChildren() override { return m_backend.GetNumChildren(); } + uint32_t CalculateNumChildren() override { return m_backend.GetNumChildren(); } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return m_backend.GetChildAtIndex(idx); diff --git a/lldb/source/DataFormatters/TypeSynthetic.cpp b/lldb/source/DataFormatters/TypeSynthetic.cpp index 8a6f132a39577a..a05fbe9a73c263 100644 --- a/lldb/source/DataFormatters/TypeSynthetic.cpp +++ b/lldb/source/DataFormatters/TypeSynthetic.cpp @@ -178,13 +178,13 @@ bool ScriptedSyntheticChildren::FrontEnd::IsValid() { return (m_wrapper_sp && m_wrapper_sp->IsValid() && m_interpreter); } -size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren() { +uint32_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren() { if (!m_wrapper_sp || m_interpreter == nullptr) return 0; return m_interpreter->CalculateNumChildren(m_wrapper_sp, UINT32_MAX); } -size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren(uint32_t max) { +uint32_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren(uint32_t max) { if (!m_wrapper_sp || m_interpreter == nullptr) return 0; return m_interpreter->CalculateNumChildren(m_wrapper_sp, max); diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp index c94ca68319ee2c..6f1f0e5b058fdf 100644 --- a/lldb/source/DataFormatters/VectorType.cpp +++ b/lldb/source/DataFormatters/VectorType.cpp @@ -224,7 +224,7 @@ class VectorTypeSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~VectorTypeSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { return m_num_children; } + uint32_t CalculateNumChildren() override { return m_num_children; } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { if (idx >= CalculateNumChildren()) diff --git a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp index 2e43aa3fa1d8bf..f8e47efa7d9164 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp @@ -74,7 +74,7 @@ class BlockPointerSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~BlockPointerSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { const bool omit_empty_base_classes = false; return m_block_struct_type.GetNumChildren(omit_empty_base_classes, nullptr); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp index 742017438bcf4a..ea1218350f6c77 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp @@ -104,7 +104,7 @@ lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: ~StdlibCoroutineHandleSyntheticFrontEnd() = default; -size_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: +uint32_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: CalculateNumChildren() { if (!m_resume_ptr_sp || !m_destroy_ptr_sp) return 0; diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h index d38c7ecefa6e13..8ec2c14d2e9e96 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h @@ -34,7 +34,7 @@ class StdlibCoroutineHandleSyntheticFrontEnd ~StdlibCoroutineHandleSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index ac316638523584..c8fc3af4a9d4a2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -34,7 +34,7 @@ class GenericBitsetFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_elements.size(); } + uint32_t CalculateNumChildren() override { return m_elements.size(); } ValueObjectSP GetChildAtIndex(size_t idx) override; private: diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp index 57331eaa986890..34ae1a24892136 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp @@ -41,7 +41,7 @@ class GenericOptionalFrontend : public SyntheticChildrenFrontEnd { } bool MightHaveChildren() override { return true; } - size_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } + uint32_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } ValueObjectSP GetChildAtIndex(size_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index 7893aa7cc1f9df..710b235ce957f2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -351,7 +351,7 @@ lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -size_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: CalculateNumChildren() { return 2; } @@ -509,7 +509,7 @@ lldb::ChildCacheState lldb_private::formatters:: return lldb::ChildCacheState::eRefetch; } -size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: CalculateNumChildren() { return 2; } @@ -566,7 +566,7 @@ lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: CalculateNumChildren() { return (m_cntrl ? 1 : 0); } @@ -661,7 +661,7 @@ lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEndCreator( : nullptr); } -size_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: CalculateNumChildren() { if (m_value_ptr_sp) return m_deleter_sp ? 2 : 1; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index d823fbd76222db..af4f313a905093 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -87,7 +87,7 @@ class LibCxxMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibCxxMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -135,7 +135,7 @@ class LibCxxUnorderedMapIteratorSyntheticFrontEnd ~LibCxxUnorderedMapIteratorSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -166,7 +166,7 @@ class LibcxxSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxSharedPtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -186,7 +186,7 @@ class LibcxxUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxUniquePtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp index c81b1e8012f6a9..484d756a300e30 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp @@ -90,7 +90,7 @@ class LibcxxStdAtomicSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdAtomicSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -124,7 +124,7 @@ bool lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: return true; } -size_t lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: CalculateNumChildren() { return m_real_child ? 1 : 0; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp index 3c33f94f923734..ef94815b3afc6d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp @@ -26,7 +26,7 @@ class LibcxxInitializerListSyntheticFrontEnd ~LibcxxInitializerListSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -59,7 +59,7 @@ lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: // delete m_start; } -size_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: CalculateNumChildren() { m_num_elements = 0; ValueObjectSP size_sp(m_backend.GetChildMemberWithName("__size_")); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp index e28ef818b10faf..d05a64b1eba569 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp @@ -136,7 +136,7 @@ class ForwardListFrontEnd : public AbstractListFrontEnd { public: ForwardListFrontEnd(ValueObject &valobj); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; ValueObjectSP GetChildAtIndex(size_t idx) override; lldb::ChildCacheState Update() override; }; @@ -147,7 +147,7 @@ class ListFrontEnd : public AbstractListFrontEnd { ~ListFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -240,7 +240,7 @@ ForwardListFrontEnd::ForwardListFrontEnd(ValueObject &valobj) Update(); } -size_t ForwardListFrontEnd::CalculateNumChildren() { +uint32_t ForwardListFrontEnd::CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; @@ -308,7 +308,7 @@ ListFrontEnd::ListFrontEnd(lldb::ValueObjectSP valobj_sp) Update(); } -size_t ListFrontEnd::CalculateNumChildren() { +uint32_t ListFrontEnd::CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; if (!m_head || !m_tail || m_node_address == 0) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index d208acfc9da47e..28a9a26c9d2db2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -177,7 +177,7 @@ class LibcxxStdMapSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdMapSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -209,7 +209,7 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp index 83f93b16fc9a2d..beab453b788adf 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp @@ -28,7 +28,7 @@ class QueueFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { return m_container_sp ? m_container_sp->GetNumChildren() : 0; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp index c032d67c66cb47..cda6c5d79c3190 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp @@ -27,7 +27,7 @@ class LibcxxStdRangesRefViewSyntheticFrontEnd ~LibcxxStdRangesRefViewSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { // __range_ will be the sole child of this type return 1; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp index 4ddfaef9c0ad54..5aaf1ec2d56e3d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp @@ -27,7 +27,7 @@ class LibcxxStdSpanSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdSpanSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -73,7 +73,7 @@ lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: CalculateNumChildren() { return m_num_elements; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp index 546871012d2b38..5c42d8551d4fe2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp @@ -26,7 +26,7 @@ class TupleFrontEnd: public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_elements.size(); } + uint32_t CalculateNumChildren() override { return m_elements.size(); } ValueObjectSP GetChildAtIndex(size_t idx) override; private: diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 4cac52f235a19a..0be73a9e633d9a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -33,7 +33,7 @@ class LibcxxStdUnorderedMapSyntheticFrontEnd ~LibcxxStdUnorderedMapSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -62,7 +62,7 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: CalculateNumChildren() { return m_num_elements; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp index 7c8fd25fd9f281..eae7711c8cd9f8 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp @@ -24,7 +24,7 @@ class LibcxxStdValarraySyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdValarraySyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -63,7 +63,7 @@ lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: // delete m_finish; } -size_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: CalculateNumChildren() { if (!m_start || !m_finish) return 0; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp index ecbb7cf0ca2b46..c704e1b8335eac 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp @@ -205,7 +205,7 @@ class VariantFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_size; } + uint32_t CalculateNumChildren() override { return m_size; } ValueObjectSP GetChildAtIndex(size_t idx) override; private: diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp index 0c3c3f02b60c7b..62ff76b82a2a00 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp @@ -25,7 +25,7 @@ class LibcxxStdVectorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdVectorSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -46,7 +46,7 @@ class LibcxxVectorBoolSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxVectorBoolSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -82,7 +82,7 @@ lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: // delete m_finish; } -size_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: CalculateNumChildren() { if (!m_start || !m_finish) return 0; @@ -165,7 +165,7 @@ lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: } } -size_t lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: CalculateNumChildren() { return m_count; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp index 411551839e1e61..b2249d23961f6d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp @@ -43,7 +43,7 @@ class LibstdcppMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibstdcppMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -64,7 +64,7 @@ class LibStdcppSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppSharedPtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -132,7 +132,7 @@ lldb::ChildCacheState LibstdcppMapIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eReuse; } -size_t LibstdcppMapIteratorSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibstdcppMapIteratorSyntheticFrontEnd::CalculateNumChildren() { return 2; } @@ -219,7 +219,7 @@ lldb::ChildCacheState VectorIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -size_t VectorIteratorSyntheticFrontEnd::CalculateNumChildren() { return 1; } +uint32_t VectorIteratorSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP VectorIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { @@ -371,7 +371,7 @@ LibStdcppSharedPtrSyntheticFrontEnd::LibStdcppSharedPtrSyntheticFrontEnd( Update(); } -size_t LibStdcppSharedPtrSyntheticFrontEnd::CalculateNumChildren() { return 1; } +uint32_t LibStdcppSharedPtrSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP LibStdcppSharedPtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp index 189f9561e52a1b..ba3116d01a72b8 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp @@ -26,7 +26,7 @@ class LibStdcppTupleSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppTupleSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -95,7 +95,7 @@ LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { return lldb::ValueObjectSP(); } -size_t LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() { return m_members.size(); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp index 3b0f6329d0e3ff..aca18ee694c0e2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp @@ -26,7 +26,7 @@ class LibStdcppUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppUniquePtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -135,7 +135,7 @@ LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { return lldb::ValueObjectSP(); } -size_t LibStdcppUniquePtrSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibStdcppUniquePtrSyntheticFrontEnd::CalculateNumChildren() { if (m_del_obj) return 2; return 1; diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp index 64047dc53236bf..05b324a30f2d67 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp @@ -1038,7 +1038,7 @@ class ObjCClassSyntheticChildrenFrontEnd : public SyntheticChildrenFrontEnd { ~ObjCClassSyntheticChildrenFrontEnd() override = default; - size_t CalculateNumChildren() override { return 0; } + uint32_t CalculateNumChildren() override { return 0; } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index 09bf7a23d6097e..9ee12e50b8ace9 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -50,7 +50,7 @@ class NSArrayMSyntheticFrontEndBase : public SyntheticChildrenFrontEnd { ~NSArrayMSyntheticFrontEndBase() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -214,7 +214,7 @@ class GenericNSArrayISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSArrayISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -302,7 +302,7 @@ class NSArray0SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSArray0SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -319,7 +319,7 @@ class NSArray1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSArray1SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -477,8 +477,8 @@ lldb_private::formatters:: : NSArrayMSyntheticFrontEndBase(valobj_sp), m_data_32(nullptr), m_data_64(nullptr) {} -size_t -lldb_private::formatters::NSArrayMSyntheticFrontEndBase::CalculateNumChildren() { +uint32_t lldb_private::formatters::NSArrayMSyntheticFrontEndBase:: + CalculateNumChildren() { return GetUsedCount(); } @@ -634,7 +634,7 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: } template -size_t +uint32_t lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: CalculateNumChildren() { return m_data_32 ? m_data_32->used : m_data_64->used; @@ -719,7 +719,7 @@ lldb_private::formatters::NSArray0SyntheticFrontEnd::GetIndexOfChildWithName( return UINT32_MAX; } -size_t +uint32_t lldb_private::formatters::NSArray0SyntheticFrontEnd::CalculateNumChildren() { return 0; } @@ -754,7 +754,7 @@ lldb_private::formatters::NSArray1SyntheticFrontEnd::GetIndexOfChildWithName( return UINT32_MAX; } -size_t +uint32_t lldb_private::formatters::NSArray1SyntheticFrontEnd::CalculateNumChildren() { return 1; } diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 9c252a98de8357..3bb2e4fac02e41 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -103,7 +103,7 @@ class NSDictionaryISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSDictionaryISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -144,7 +144,7 @@ class NSConstantDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -176,7 +176,7 @@ class NSCFDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSCFDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -209,7 +209,7 @@ class NSDictionary1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSDictionary1SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -230,7 +230,7 @@ class GenericNSDictionaryMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSDictionaryMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -263,7 +263,7 @@ namespace Foundation1100 { ~NSDictionaryMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -606,7 +606,7 @@ size_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -744,7 +744,7 @@ size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: CalculateNumChildren() { if (!m_hashtable.IsValid()) return 0; @@ -880,7 +880,7 @@ size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: CalculateNumChildren() { return m_size; } @@ -994,7 +994,7 @@ size_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: return name == g_zero ? 0 : UINT32_MAX; } -size_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: CalculateNumChildren() { return 1; } @@ -1087,7 +1087,7 @@ size_t lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< } template -size_t +uint32_t lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -1250,7 +1250,7 @@ lldb_private::formatters::Foundation1100:: return idx; } -size_t +uint32_t lldb_private::formatters::Foundation1100:: NSDictionaryMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp index ce52ae542a50cb..786d2bc51871a5 100644 --- a/lldb/source/Plugins/Language/ObjC/NSError.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp @@ -116,7 +116,7 @@ class NSErrorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { // no need to delete m_child_ptr - it's kept alive by the cluster manager on // our behalf - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { if (m_child_ptr) return 1; if (m_child_sp) diff --git a/lldb/source/Plugins/Language/ObjC/NSException.cpp b/lldb/source/Plugins/Language/ObjC/NSException.cpp index e8011e5d2ca0be..e47939c718010d 100644 --- a/lldb/source/Plugins/Language/ObjC/NSException.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSException.cpp @@ -123,7 +123,7 @@ class NSExceptionSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSExceptionSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { return 4; } diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp index 69e6ab1055d8c6..45c020f8d1b359 100644 --- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp @@ -40,7 +40,7 @@ class NSIndexPathSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSIndexPathSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { return m_impl.GetNumIndexes(); } + uint32_t CalculateNumChildren() override { return m_impl.GetNumIndexes(); } lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return m_impl.GetIndexAtIndex(idx, m_uint_star_type); diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index ede64852d9a879..97f19c1ac937d4 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -46,7 +46,7 @@ class NSSetISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSSetISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -84,7 +84,7 @@ class NSCFSetSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSCFSetSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -117,7 +117,7 @@ class GenericNSSetMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSSetMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -233,7 +233,7 @@ class NSSetCodeRunningSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSSetCodeRunningSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; @@ -419,7 +419,7 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::GetIndexOfChildWithName( return idx; } -size_t +uint32_t lldb_private::formatters::NSSetISyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -555,7 +555,7 @@ lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetIndexOfChildWithName( return idx; } -size_t +uint32_t lldb_private::formatters::NSCFSetSyntheticFrontEnd::CalculateNumChildren() { if (!m_hashtable.IsValid()) return 0; @@ -696,7 +696,7 @@ lldb_private::formatters:: } template -size_t +uint32_t lldb_private::formatters:: GenericNSSetMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) From e710523e408ce64c15fddf9f7dbe1248795c20d7 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Mon, 4 Mar 2024 17:50:47 -0800 Subject: [PATCH 072/158] Change GetChildAtIndex to take a uint32_t --- lldb/include/lldb/Core/ValueObject.h | 4 ++-- .../lldb/Core/ValueObjectSyntheticFilter.h | 2 +- .../lldb/DataFormatters/TypeSynthetic.h | 8 +++---- .../lldb/DataFormatters/VectorIterator.h | 2 +- lldb/source/Core/ValueObject.cpp | 2 +- .../Core/ValueObjectSyntheticFilter.cpp | 14 +++++------ lldb/source/DataFormatters/TypeSynthetic.cpp | 2 +- lldb/source/DataFormatters/VectorType.cpp | 2 +- .../Language/CPlusPlus/BlockPointer.cpp | 2 +- .../Plugins/Language/CPlusPlus/Coroutines.cpp | 2 +- .../Plugins/Language/CPlusPlus/Coroutines.h | 2 +- .../Language/CPlusPlus/GenericBitset.cpp | 4 ++-- .../Language/CPlusPlus/GenericOptional.cpp | 4 ++-- .../Plugins/Language/CPlusPlus/LibCxx.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibCxx.h | 8 +++---- .../Language/CPlusPlus/LibCxxAtomic.cpp | 4 ++-- .../CPlusPlus/LibCxxInitializerList.cpp | 4 ++-- .../Plugins/Language/CPlusPlus/LibCxxList.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibCxxMap.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxQueue.cpp | 2 +- .../CPlusPlus/LibCxxRangesRefView.cpp | 2 +- .../Plugins/Language/CPlusPlus/LibCxxSpan.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxTuple.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxUnorderedMap.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxValarray.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxVariant.cpp | 4 ++-- .../Language/CPlusPlus/LibCxxVector.cpp | 8 +++---- .../Plugins/Language/CPlusPlus/LibStdcpp.cpp | 10 ++++---- .../Language/CPlusPlus/LibStdcppTuple.cpp | 4 ++-- .../CPlusPlus/LibStdcppUniquePointer.cpp | 4 ++-- lldb/source/Plugins/Language/ObjC/Cocoa.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSArray.cpp | 16 ++++++------- .../Plugins/Language/ObjC/NSDictionary.cpp | 24 +++++++++---------- lldb/source/Plugins/Language/ObjC/NSError.cpp | 2 +- .../Plugins/Language/ObjC/NSException.cpp | 2 +- .../Plugins/Language/ObjC/NSIndexPath.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSSet.cpp | 15 ++++++------ 37 files changed, 100 insertions(+), 99 deletions(-) diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 05dd64f5634fda..757ae0601c2466 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -465,7 +465,7 @@ class ValueObject { /// Returns a unique id for this ValueObject. lldb::user_id_t GetID() const { return m_id.GetID(); } - virtual lldb::ValueObjectSP GetChildAtIndex(size_t idx, + virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx, bool can_create = true); // The method always creates missing children in the path, if necessary. @@ -791,7 +791,7 @@ class ValueObject { return (m_children.find(idx) != m_children.end()); } - ValueObject *GetChildAtIndex(size_t idx) { + ValueObject *GetChildAtIndex(uint32_t idx) { std::lock_guard guard(m_mutex); const auto iter = m_children.find(idx); return ((iter == m_children.end()) ? nullptr : iter->second); diff --git a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h index 57794072ff9229..1e54babc94f395 100644 --- a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h +++ b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h @@ -51,7 +51,7 @@ class ValueObjectSynthetic : public ValueObject { lldb::ValueType GetValueType() const override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx, + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx, bool can_create = true) override; lldb::ValueObjectSP GetChildMemberWithName(llvm::StringRef name, diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h index 7bb011c1579449..38f3ce0fa5f011 100644 --- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h +++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h @@ -45,7 +45,7 @@ class SyntheticChildrenFrontEnd { return count <= max ? count : max; } - virtual lldb::ValueObjectSP GetChildAtIndex(size_t idx) = 0; + virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) = 0; virtual size_t GetIndexOfChildWithName(ConstString name) = 0; @@ -111,7 +111,7 @@ class SyntheticValueProviderFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return nullptr; } + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return nullptr; } size_t GetIndexOfChildWithName(ConstString name) override { return UINT32_MAX; @@ -324,7 +324,7 @@ class TypeFilterImpl : public SyntheticChildren { uint32_t CalculateNumChildren() override { return filter->GetCount(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx >= filter->GetCount()) return lldb::ValueObjectSP(); return m_backend.GetSyntheticExpressionPathChild( @@ -430,7 +430,7 @@ class ScriptedSyntheticChildren : public SyntheticChildren { uint32_t CalculateNumChildren(uint32_t max) override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/include/lldb/DataFormatters/VectorIterator.h b/lldb/include/lldb/DataFormatters/VectorIterator.h index 88500b0bfdd400..7711b9de95dba8 100644 --- a/lldb/include/lldb/DataFormatters/VectorIterator.h +++ b/lldb/include/lldb/DataFormatters/VectorIterator.h @@ -26,7 +26,7 @@ class VectorIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp index 840b100c70ddaa..dcf7ccadef8801 100644 --- a/lldb/source/Core/ValueObject.cpp +++ b/lldb/source/Core/ValueObject.cpp @@ -372,7 +372,7 @@ bool ValueObject::IsLogicalTrue(Status &error) { return ret; } -ValueObjectSP ValueObject::GetChildAtIndex(size_t idx, bool can_create) { +ValueObjectSP ValueObject::GetChildAtIndex(uint32_t idx, bool can_create) { ValueObjectSP child_sp; // We may need to update our value if we are dynamic if (IsPossibleDynamicType()) diff --git a/lldb/source/Core/ValueObjectSyntheticFilter.cpp b/lldb/source/Core/ValueObjectSyntheticFilter.cpp index b03bd9a80e506e..7f8a9a34cb35df 100644 --- a/lldb/source/Core/ValueObjectSyntheticFilter.cpp +++ b/lldb/source/Core/ValueObjectSyntheticFilter.cpp @@ -33,7 +33,7 @@ class DummySyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override { return m_backend.GetNumChildren(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_backend.GetChildAtIndex(idx); } @@ -236,13 +236,13 @@ bool ValueObjectSynthetic::UpdateValue() { return true; } -lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, +lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(uint32_t idx, bool can_create) { Log *log = GetLog(LLDBLog::DataFormatters); LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, retrieving " - "child at index %zu", + "child at index %u", GetName().AsCString(), idx); UpdateValueIfNeeded(); @@ -261,7 +261,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, if (can_create && m_synth_filter_up != nullptr) { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu not cached and will be created", + "index %u not cached and will be created", GetName().AsCString(), idx); lldb::ValueObjectSP synth_guy = m_synth_filter_up->GetChildAtIndex(idx); @@ -269,7 +269,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, LLDB_LOGF( log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at index " - "%zu created as %p (is " + "%u created as %p (is " "synthetic: %s)", GetName().AsCString(), idx, static_cast(synth_guy.get()), synth_guy.get() @@ -291,7 +291,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, } else { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu not cached and cannot " + "index %u not cached and cannot " "be created (can_create = %s, synth_filter = %p)", GetName().AsCString(), idx, can_create ? "yes" : "no", static_cast(m_synth_filter_up.get())); @@ -301,7 +301,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, } else { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu cached as %p", + "index %u cached as %p", GetName().AsCString(), idx, static_cast(valobj)); return valobj->GetSP(); diff --git a/lldb/source/DataFormatters/TypeSynthetic.cpp b/lldb/source/DataFormatters/TypeSynthetic.cpp index a05fbe9a73c263..0ae38c4d31f26b 100644 --- a/lldb/source/DataFormatters/TypeSynthetic.cpp +++ b/lldb/source/DataFormatters/TypeSynthetic.cpp @@ -167,7 +167,7 @@ ScriptedSyntheticChildren::FrontEnd::FrontEnd(std::string pclass, ScriptedSyntheticChildren::FrontEnd::~FrontEnd() = default; lldb::ValueObjectSP -ScriptedSyntheticChildren::FrontEnd::GetChildAtIndex(size_t idx) { +ScriptedSyntheticChildren::FrontEnd::GetChildAtIndex(uint32_t idx) { if (!m_wrapper_sp || !m_interpreter) return lldb::ValueObjectSP(); diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp index 6f1f0e5b058fdf..a0626a8cba7788 100644 --- a/lldb/source/DataFormatters/VectorType.cpp +++ b/lldb/source/DataFormatters/VectorType.cpp @@ -226,7 +226,7 @@ class VectorTypeSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override { return m_num_children; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx >= CalculateNumChildren()) return {}; std::optional size = m_child_type.GetByteSize(nullptr); diff --git a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp index f8e47efa7d9164..ef0f67d1e9f9e7 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp @@ -79,7 +79,7 @@ class BlockPointerSyntheticFrontEnd : public SyntheticChildrenFrontEnd { return m_block_struct_type.GetNumChildren(omit_empty_base_classes, nullptr); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (!m_block_struct_type.IsValid()) { return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp index ea1218350f6c77..3827f9c21effab 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp @@ -113,7 +113,7 @@ uint32_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - StdlibCoroutineHandleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + StdlibCoroutineHandleSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { switch (idx) { case 0: return m_resume_ptr_sp; diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h index 8ec2c14d2e9e96..5c6a80b57ff424 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h @@ -36,7 +36,7 @@ class StdlibCoroutineHandleSyntheticFrontEnd uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index c8fc3af4a9d4a2..6a9da1d17c7620 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -35,7 +35,7 @@ class GenericBitsetFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; uint32_t CalculateNumChildren() override { return m_elements.size(); } - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: llvm::StringRef GetDataContainerMemberName(); @@ -97,7 +97,7 @@ lldb::ChildCacheState GenericBitsetFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP GenericBitsetFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP GenericBitsetFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_elements.size() || !m_first) return ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp index 34ae1a24892136..c06afb53eb8aad 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp @@ -43,7 +43,7 @@ class GenericOptionalFrontend : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } uint32_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; private: @@ -81,7 +81,7 @@ lldb::ChildCacheState GenericOptionalFrontend::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP GenericOptionalFrontend::GetChildAtIndex(size_t _idx) { +ValueObjectSP GenericOptionalFrontend::GetChildAtIndex(uint32_t _idx) { if (!m_has_value) return ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index 710b235ce957f2..bba887fec3ac3f 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -358,7 +358,7 @@ uint32_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (m_pair_ptr) return m_pair_ptr->GetChildAtIndex(idx); if (m_pair_sp) @@ -515,7 +515,7 @@ uint32_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (m_pair_sp) return m_pair_sp->GetChildAtIndex(idx); return lldb::ValueObjectSP(); @@ -573,7 +573,7 @@ uint32_t lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_cntrl) return lldb::ValueObjectSP(); @@ -670,7 +670,7 @@ uint32_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_value_ptr_sp) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index af4f313a905093..ad2f58508ab7b6 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -89,7 +89,7 @@ class LibCxxMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -137,7 +137,7 @@ class LibCxxUnorderedMapIteratorSyntheticFrontEnd uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -168,7 +168,7 @@ class LibcxxSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -188,7 +188,7 @@ class LibcxxUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp index 484d756a300e30..8e4c36103a744d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp @@ -92,7 +92,7 @@ class LibcxxStdAtomicSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -131,7 +131,7 @@ uint32_t lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx == 0) return m_real_child->GetSP()->Clone(ConstString("Value")); return nullptr; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp index ef94815b3afc6d..00012dfc056e47 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp @@ -28,7 +28,7 @@ class LibcxxInitializerListSyntheticFrontEnd uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -69,7 +69,7 @@ uint32_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - LibcxxInitializerListSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibcxxInitializerListSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (!m_start) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp index d05a64b1eba569..17f6b737d9f628 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp @@ -137,7 +137,7 @@ class ForwardListFrontEnd : public AbstractListFrontEnd { ForwardListFrontEnd(ValueObject &valobj); uint32_t CalculateNumChildren() override; - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; }; @@ -149,7 +149,7 @@ class ListFrontEnd : public AbstractListFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -253,7 +253,7 @@ uint32_t ForwardListFrontEnd::CalculateNumChildren() { return m_count; } -ValueObjectSP ForwardListFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP ForwardListFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return nullptr; @@ -343,7 +343,7 @@ uint32_t ListFrontEnd::CalculateNumChildren() { } } -lldb::ValueObjectSP ListFrontEnd::GetChildAtIndex(size_t idx) { +lldb::ValueObjectSP ListFrontEnd::GetChildAtIndex(uint32_t idx) { static ConstString g_value("__value_"); static ConstString g_next("__next_"); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index 28a9a26c9d2db2..6d24eb03779ca3 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -179,7 +179,7 @@ class LibcxxStdMapSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -308,7 +308,7 @@ void lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetValueOffset( lldb::ValueObjectSP lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { static ConstString g_cc_("__cc_"), g_cc("__cc"); static ConstString g_nc("__nc"); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp index beab453b788adf..fbadee89b7b7f2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp @@ -32,7 +32,7 @@ class QueueFrontEnd : public SyntheticChildrenFrontEnd { return m_container_sp ? m_container_sp->GetNumChildren() : 0; } - ValueObjectSP GetChildAtIndex(size_t idx) override { + ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_container_sp ? m_container_sp->GetChildAtIndex(idx) : nullptr; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp index cda6c5d79c3190..74f54f76735667 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp @@ -32,7 +32,7 @@ class LibcxxStdRangesRefViewSyntheticFrontEnd return 1; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { // Since we only have a single child, return it assert(idx == 0); return m_range_sp; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp index 5aaf1ec2d56e3d..af2b51d2b54016 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp @@ -29,7 +29,7 @@ class LibcxxStdSpanSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; /// Determines properties of the std::span<> associated with this object // @@ -80,7 +80,7 @@ uint32_t lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start) return {}; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp index 5c42d8551d4fe2..62bb7d619267a3 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp @@ -27,7 +27,7 @@ class TupleFrontEnd: public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; uint32_t CalculateNumChildren() override { return m_elements.size(); } - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: // The lifetime of a ValueObject and all its derivative ValueObjects @@ -58,7 +58,7 @@ lldb::ChildCacheState TupleFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP TupleFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP TupleFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_elements.size()) return ValueObjectSP(); if (!m_base) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 0be73a9e633d9a..b3c36429433570 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -35,7 +35,7 @@ class LibcxxStdUnorderedMapSyntheticFrontEnd uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -93,7 +93,7 @@ static bool isUnorderedMap(ConstString type_name) { } lldb::ValueObjectSP lldb_private::formatters:: - LibcxxStdUnorderedMapSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibcxxStdUnorderedMapSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); if (m_tree == nullptr) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp index eae7711c8cd9f8..463c7b8d7ce3bb 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp @@ -26,7 +26,7 @@ class LibcxxStdValarraySyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -84,7 +84,7 @@ uint32_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start || !m_finish) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp index c704e1b8335eac..1f62062f09be30 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp @@ -206,7 +206,7 @@ class VariantFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; uint32_t CalculateNumChildren() override { return m_size; } - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: size_t m_size = 0; @@ -233,7 +233,7 @@ lldb::ChildCacheState VariantFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP VariantFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP VariantFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_size) return {}; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp index 62ff76b82a2a00..fcf727ad2ea027 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp @@ -27,7 +27,7 @@ class LibcxxStdVectorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -48,7 +48,7 @@ class LibcxxVectorBoolSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -103,7 +103,7 @@ uint32_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start || !m_finish) return lldb::ValueObjectSP(); @@ -172,7 +172,7 @@ uint32_t lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { auto iter = m_children.find(idx), end = m_children.end(); if (iter != end) return iter->second; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp index b2249d23961f6d..5abb3d50674bc5 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp @@ -45,7 +45,7 @@ class LibstdcppMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -66,7 +66,7 @@ class LibStdcppSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -137,7 +137,7 @@ uint32_t LibstdcppMapIteratorSyntheticFrontEnd::CalculateNumChildren() { } lldb::ValueObjectSP -LibstdcppMapIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibstdcppMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (m_pair_address != 0 && m_pair_type) { if (!m_pair_sp) m_pair_sp = CreateValueObjectFromAddress("pair", m_pair_address, @@ -222,7 +222,7 @@ lldb::ChildCacheState VectorIteratorSyntheticFrontEnd::Update() { uint32_t VectorIteratorSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP -VectorIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +VectorIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0) return m_item_sp; return lldb::ValueObjectSP(); @@ -374,7 +374,7 @@ LibStdcppSharedPtrSyntheticFrontEnd::LibStdcppSharedPtrSyntheticFrontEnd( uint32_t LibStdcppSharedPtrSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP -LibStdcppSharedPtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppSharedPtrSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0) return m_ptr_obj->GetSP(); if (idx == 1) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp index ba3116d01a72b8..64d2ec9d943a2a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp @@ -28,7 +28,7 @@ class LibStdcppTupleSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -89,7 +89,7 @@ lldb::ChildCacheState LibStdcppTupleSyntheticFrontEnd::Update() { bool LibStdcppTupleSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP -LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx < m_members.size() && m_members[idx]) return m_members[idx]->GetSP(); return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp index aca18ee694c0e2..3a48fe412e0721 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp @@ -28,7 +28,7 @@ class LibStdcppUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -116,7 +116,7 @@ lldb::ChildCacheState LibStdcppUniquePtrSyntheticFrontEnd::Update() { bool LibStdcppUniquePtrSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP -LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0 && m_ptr_obj) return m_ptr_obj->GetSP(); if (idx == 1 && m_del_obj) diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp index 05b324a30f2d67..cb740f8e71e168 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp @@ -1040,7 +1040,7 @@ class ObjCClassSyntheticChildrenFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index 9ee12e50b8ace9..7f060b2613d6f3 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -52,7 +52,7 @@ class NSArrayMSyntheticFrontEndBase : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override = 0; @@ -216,7 +216,7 @@ class GenericNSArrayISyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -304,7 +304,7 @@ class NSArray0SyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -321,7 +321,7 @@ class NSArray1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -484,7 +484,7 @@ uint32_t lldb_private::formatters::NSArrayMSyntheticFrontEndBase:: lldb::ValueObjectSP lldb_private::formatters::NSArrayMSyntheticFrontEndBase::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); lldb::addr_t object_at_idx = GetDataAddress(); @@ -684,7 +684,7 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: template lldb::ValueObjectSP lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: - GetChildAtIndex(size_t idx) { + GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); lldb::addr_t object_at_idx; @@ -735,7 +735,7 @@ bool lldb_private::formatters::NSArray0SyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSArray0SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { return lldb::ValueObjectSP(); } @@ -770,7 +770,7 @@ bool lldb_private::formatters::NSArray1SyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSArray1SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { static const ConstString g_zero("[0]"); if (idx == 0) { diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 3bb2e4fac02e41..da94eda1529ce1 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -105,7 +105,7 @@ class NSDictionaryISyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -146,7 +146,7 @@ class NSConstantDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -178,7 +178,7 @@ class NSCFDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -211,7 +211,7 @@ class NSDictionary1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -232,7 +232,7 @@ class GenericNSDictionaryMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -265,7 +265,7 @@ namespace Foundation1100 { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -655,7 +655,7 @@ bool lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSDictionaryISyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -777,7 +777,7 @@ bool lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { lldb::addr_t m_keys_ptr = m_hashtable.GetKeyPointer(); lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); @@ -920,7 +920,7 @@ bool lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - NSConstantDictionarySyntheticFrontEnd::GetChildAtIndex(size_t idx) { + NSConstantDictionarySyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -1012,7 +1012,7 @@ bool lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSDictionary1SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx != 0) return lldb::ValueObjectSP(); @@ -1140,7 +1140,7 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: template lldb::ValueObjectSP lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< - D32, D64>::GetChildAtIndex(size_t idx) { + D32, D64>::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_keys_ptr; lldb::addr_t m_values_ptr; if (m_data_32) { @@ -1300,7 +1300,7 @@ lldb_private::formatters::Foundation1100:: lldb::ValueObjectSP lldb_private::formatters::Foundation1100:: - NSDictionaryMSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + NSDictionaryMSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_keys_ptr = (m_data_32 ? m_data_32->_keys_addr : m_data_64->_keys_addr); lldb::addr_t m_values_ptr = diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp index 786d2bc51871a5..b034e799b716e7 100644 --- a/lldb/source/Plugins/Language/ObjC/NSError.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp @@ -124,7 +124,7 @@ class NSErrorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx != 0) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/ObjC/NSException.cpp b/lldb/source/Plugins/Language/ObjC/NSException.cpp index e47939c718010d..09d3a1b42b747f 100644 --- a/lldb/source/Plugins/Language/ObjC/NSException.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSException.cpp @@ -127,7 +127,7 @@ class NSExceptionSyntheticFrontEnd : public SyntheticChildrenFrontEnd { return 4; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { switch (idx) { case 0: return m_name_sp; case 1: return m_reason_sp; diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp index 45c020f8d1b359..10bb907c58ed42 100644 --- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp @@ -42,7 +42,7 @@ class NSIndexPathSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override { return m_impl.GetNumIndexes(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_impl.GetIndexAtIndex(idx, m_uint_star_type); } diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index 97f19c1ac937d4..c965a2a1340030 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -48,7 +48,7 @@ class NSSetISyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -86,7 +86,7 @@ class NSCFSetSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -119,7 +119,7 @@ class GenericNSSetMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -235,7 +235,7 @@ class NSSetCodeRunningSyntheticFrontEnd : public SyntheticChildrenFrontEnd { uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -466,7 +466,8 @@ bool lldb_private::formatters::NSSetISyntheticFrontEnd::MightHaveChildren() { } lldb::ValueObjectSP -lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex(size_t idx) { +lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex( + uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -587,7 +588,7 @@ bool lldb_private::formatters::NSCFSetSyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); const uint32_t num_children = CalculateNumChildren(); @@ -748,7 +749,7 @@ lldb_private::formatters:: template lldb::ValueObjectSP lldb_private::formatters:: - GenericNSSetMSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + GenericNSSetMSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_objs_addr = (m_data_32 ? m_data_32->_objs_addr : m_data_64->_objs_addr); From 6f299417769ade1635c91f974a8745e237cc9adf Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 7 Mar 2024 18:57:09 +0000 Subject: [PATCH 073/158] [TBAA] Add extra tests to copy structs with union members. Adds extra test coverage for TBAA generation for copies of structs with union members. --- clang/test/CodeGen/tbaa-struct.cpp | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/clang/test/CodeGen/tbaa-struct.cpp b/clang/test/CodeGen/tbaa-struct.cpp index 883c982be26c8f..63e4097946448e 100644 --- a/clang/test/CodeGen/tbaa-struct.cpp +++ b/clang/test/CodeGen/tbaa-struct.cpp @@ -151,6 +151,38 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { *a1 = *a2; } +union U2 { + double d; + float f; +}; + +struct UnionMember1 { + U2 u; + int p; +}; + +void copy11(UnionMember1 *a1, UnionMember1 *a2) { +// CHECK-LABEL: _Z6copy11P12UnionMember1S0_ +// CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), +// CHECK-OLD-SAME: !tbaa.struct [[TS9:!.*]] +// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember1:!.+]], !tbaa.struct + *a1 = *a2; +} + +struct UnionMember2 { + int p; + U2 u; +}; + +void copy12(UnionMember2 *a1, UnionMember2 *a2) { +// CHECK-LABEL: _Z6copy12P12UnionMember2S0_ +// CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), +// CHECK-OLD-SAME: !tbaa.struct [[TS10:!.*]] +// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember2:!.+]], !tbaa.struct + + *a1 = *a2; +} + // CHECK-OLD: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}} // CHECK-OLD: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}} // CHECK-OLD: [[TAG_INT:!.*]] = !{[[INT:!.*]], [[INT]], i64 0} @@ -167,6 +199,10 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { // CHECK-OLD [[DOUBLE]] = !{!"double", [[CHAR]], i64 0} // CHECK-OLD: [[TS7]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]], i64 3, i64 1, [[TAG_CHAR]], i64 4, i64 1, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]], i64 16, i64 1, [[TAG_CHAR]]} // CHECK-OLD: [[TS8]] = !{i64 0, i64 4, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]]} +// CHECK-OLD: [[TS9]] = !{i64 0, i64 8, [[TAG_DOUBLE]], i64 0, i64 4, [[TAG_FLOAT:!.+]], i64 8, i64 4, [[TAG_INT]]} +// CHECK-OLD: [[TAG_FLOAT]] = !{[[FLOAT:!.+]], [[FLOAT]], i64 0} +// CHECK-OLD: [[FLOAT]] = !{!"float", [[CHAR]], i64 0} +// CHECK-OLD: [[TS10]] = !{i64 0, i64 4, [[TAG_INT]], i64 8, i64 8, [[TAG_DOUBLE]], i64 8, i64 4, [[TAG_FLOAT:!.+]]} // CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"} // CHECK-NEW-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0} @@ -188,3 +224,7 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { // CHECK-NEW-DAG: [[TYPE_NamedBitfields2]] = !{[[TYPE_char]], i64 24, !"_ZTS15NamedBitfields2", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1, [[TYPE_int]], i64 3, i64 4, [[TYPE_int]], i64 3, i64 4, [[TYPE_char]], i64 4, i64 1, [[TYPE_double]], i64 8, i64 8, [[TYPE_int]], i64 16, i64 4} // CHECK-NEW-DAG: [[TAG_NamedBitfields3]] = !{[[TYPE_NamedBitfields3:!.+]], [[TYPE_NamedBitfields3]], i64 0, i64 16} // CHECK-NEW-DAG: [[TYPE_NamedBitfields3]] = !{[[TYPE_char]], i64 16, !"_ZTS15NamedBitfields3", [[TYPE_int]], i64 1, i64 4, [[TYPE_int]], i64 2, i64 4, [[TYPE_double]], i64 8, i64 8} +// CHECK-NEW-DAG: [[TAG_UnionMember1]] = !{[[TYPE_UnionMember1:!.+]], [[TYPE_UnionMember1]], i64 0, i64 16} +// CHECK-NEW-DAG: [[TYPE_UnionMember1]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember1", [[TYPE_char]], i64 0, i64 8, [[TYPE_int]], i64 8, i64 4} +// CHECK-NEW-DAG: [[TAG_UnionMember2]] = !{[[TYPE_UnionMember2:!.+]], [[TYPE_UnionMember2]], i64 0, i64 16} +// CHECK-NEW-DAG: [[TYPE_UnionMember2]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember2", [[TYPE_int]], i64 0, i64 4, [[TYPE_char]], i64 8, i64 8} From 7fc583c9a5ddf447b2b53007778cb034a186d4b5 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 5 Mar 2024 16:29:00 -0800 Subject: [PATCH 074/158] Change Get|SetNumChildren to use unint32_t --- lldb/include/lldb/Core/ValueObject.h | 4 ++-- lldb/source/Core/ValueObject.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 757ae0601c2466..b4d2c8098edc71 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -476,7 +476,7 @@ class ValueObject { virtual size_t GetIndexOfChildWithName(llvm::StringRef name); - size_t GetNumChildren(uint32_t max = UINT32_MAX); + uint32_t GetNumChildren(uint32_t max = UINT32_MAX); const Value &GetValue() const { return m_value; } @@ -960,7 +960,7 @@ class ValueObject { /// Should only be called by ValueObject::GetNumChildren(). virtual uint32_t CalculateNumChildren(uint32_t max = UINT32_MAX) = 0; - void SetNumChildren(size_t num_children); + void SetNumChildren(uint32_t num_children); void SetValueDidChange(bool value_changed) { m_flags.m_value_did_change = value_changed; diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp index dcf7ccadef8801..0ed7f03be25c16 100644 --- a/lldb/source/Core/ValueObject.cpp +++ b/lldb/source/Core/ValueObject.cpp @@ -440,7 +440,7 @@ ValueObjectSP ValueObject::GetChildMemberWithName(llvm::StringRef name, return child_sp; } -size_t ValueObject::GetNumChildren(uint32_t max) { +uint32_t ValueObject::GetNumChildren(uint32_t max) { UpdateValueIfNeeded(); if (max < UINT32_MAX) { @@ -470,7 +470,7 @@ bool ValueObject::MightHaveChildren() { } // Should only be called by ValueObject::GetNumChildren() -void ValueObject::SetNumChildren(size_t num_children) { +void ValueObject::SetNumChildren(uint32_t num_children) { m_flags.m_children_count_valid = true; m_children.SetChildrenCount(num_children); } From c103d573e7fc236c0c9e2fde41a843ea62d960f4 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 7 Mar 2024 20:00:05 +0100 Subject: [PATCH 075/158] [libc] Fix forward missing `BigInt` specialization of `mask_leading_ones` / `mask_trailing_ones` (#84325) #84299 broke the arm32 build, this patch fixes it forward. --- libc/src/__support/UInt.h | 56 +++++++++++++++ libc/src/__support/math_extras.h | 13 ++-- .../stdio/printf_core/float_dec_converter.h | 7 +- libc/test/UnitTest/CMakeLists.txt | 1 + libc/test/UnitTest/StringUtils.h | 3 +- libc/test/src/__support/CMakeLists.txt | 2 + libc/test/src/__support/math_extras_test.cpp | 69 +++++++++++++------ .../libc/test/src/__support/BUILD.bazel | 6 +- 8 files changed, 123 insertions(+), 34 deletions(-) diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index b3d8f00b9a01a5..925de8764715da 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -1056,4 +1056,60 @@ rotr(T value, int rotate) { } // namespace LIBC_NAMESPACE::cpp +namespace LIBC_NAMESPACE { + +// Specialization of mask_trailing_ones ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_ones() { + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast(count / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else + word = mask_trailing_ones(); + ++index; + } + return out; +} + +// Specialization of mask_leading_ones ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_ones() { + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast((T::BITS - count - 1ULL) / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else + word = mask_leading_ones(); + ++index; + } + return out; +} + +} // namespace LIBC_NAMESPACE + #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 7a89fbb11b2a9e..c6b458ddecdabf 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -20,21 +20,18 @@ namespace LIBC_NAMESPACE { // Create a bitmask with the count right-most bits set to 1, and all other bits // set to 0. Only unsigned types are allowed. template -LIBC_INLINE constexpr T mask_trailing_ones() { - static_assert(cpp::is_unsigned_v); +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_ones() { constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); static_assert(count <= T_BITS && "Invalid bit index"); - // It's important not to initialize T with -1, since T may be BigInt which - // will take -1 as a uint64_t and only initialize the low 64 bits. - constexpr T ALL_ZEROES(0); - constexpr T ALL_ONES(~ALL_ZEROES); // bitwise NOT performs integer promotion. - return count == 0 ? 0 : (ALL_ONES >> (T_BITS - count)); + return count == 0 ? 0 : (T(-1) >> (T_BITS - count)); } // Create a bitmask with the count left-most bits set to 1, and all other bits // set to 0. Only unsigned types are allowed. template -LIBC_INLINE constexpr T mask_leading_ones() { +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_ones() { constexpr T MASK(mask_trailing_ones()); return T(~MASK); // bitwise NOT performs integer promotion. } diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index a6c68329e66023..27d229a3e42cb5 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/UInt.h" // cpp::is_big_int_v #include "src/__support/float_to_string.h" #include "src/__support/integer_to_string.h" #include "src/__support/libc_assert.h" @@ -33,7 +34,8 @@ using ExponentString = // Returns true if value is divisible by 2^p. template -LIBC_INLINE constexpr cpp::enable_if_t, bool> +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v || cpp::is_big_int_v, bool> multiple_of_power_of_2(T value, uint32_t p) { return (value & ((T(1) << p) - 1)) == 0; } @@ -76,7 +78,8 @@ LIBC_INLINE RoundDirection get_round_direction(int last_digit, bool truncated, } template -LIBC_INLINE constexpr cpp::enable_if_t, bool> +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v || cpp::is_big_int_v, bool> zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa, const int32_t mant_width) { const int32_t required_twos = -base_2_exp - digits_after_point - 1; diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 36837c553efce1..8a35f1204eb511 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -104,6 +104,7 @@ add_header_library( DEPENDS libc.src.__support.CPP.string libc.src.__support.CPP.type_traits + libc.src.__support.uint ) add_unittest_framework_library( diff --git a/libc/test/UnitTest/StringUtils.h b/libc/test/UnitTest/StringUtils.h index 54cff97ceafb4e..1e3ba5715d23d6 100644 --- a/libc/test/UnitTest/StringUtils.h +++ b/libc/test/UnitTest/StringUtils.h @@ -11,12 +11,13 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/UInt.h" namespace LIBC_NAMESPACE { // Return the first N hex digits of an integer as a string in upper case. template -cpp::enable_if_t, cpp::string> +cpp::enable_if_t || cpp::is_big_int_v, cpp::string> int_to_hex(T value, size_t length = sizeof(T) * 2) { cpp::string s(length, '0'); diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 8c861b576f9b1b..adbacb9728ccd4 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -27,7 +27,9 @@ add_libc_test( SRCS math_extras_test.cpp DEPENDS + libc.src.__support.integer_literals libc.src.__support.math_extras + libc.src.__support.uint128 ) add_libc_test( diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index e55d995592cc1c..ed064363d446bb 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -6,34 +6,59 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/UInt128.h" // UInt128 +#include "src/__support/integer_literals.h" #include "src/__support/math_extras.h" #include "test/UnitTest/Test.h" namespace LIBC_NAMESPACE { TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) { - EXPECT_EQ(uint8_t(0), (mask_leading_ones())); - EXPECT_EQ(uint8_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint16_t(0), (mask_leading_ones())); - EXPECT_EQ(uint16_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0), (mask_leading_ones())); - EXPECT_EQ(uint32_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0), (mask_leading_ones())); - EXPECT_EQ(uint64_t(0), (mask_trailing_ones())); - - EXPECT_EQ(uint32_t(0x00000003), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xC0000000), (mask_leading_ones())); - - EXPECT_EQ(uint32_t(0x000007FF), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xFFE00000), (mask_leading_ones())); - - EXPECT_EQ(uint32_t(0xFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xFFFFFFFF), (mask_leading_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFFFF), (mask_leading_ones())); - - EXPECT_EQ(uint64_t(0x0000FFFFFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFF0000), (mask_leading_ones())); + EXPECT_EQ(0_u8, (mask_leading_ones())); + EXPECT_EQ(0_u8, (mask_trailing_ones())); + EXPECT_EQ(0_u16, (mask_leading_ones())); + EXPECT_EQ(0_u16, (mask_trailing_ones())); + EXPECT_EQ(0_u32, (mask_leading_ones())); + EXPECT_EQ(0_u32, (mask_trailing_ones())); + EXPECT_EQ(0_u64, (mask_leading_ones())); + EXPECT_EQ(0_u64, (mask_trailing_ones())); + + EXPECT_EQ(0x00000003_u32, (mask_trailing_ones())); + EXPECT_EQ(0xC0000000_u32, (mask_leading_ones())); + + EXPECT_EQ(0x000007FF_u32, (mask_trailing_ones())); + EXPECT_EQ(0xFFE00000_u32, (mask_leading_ones())); + + EXPECT_EQ(0xFFFFFFFF_u32, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFF_u32, (mask_leading_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF_u64, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF_u64, (mask_leading_ones())); + + EXPECT_EQ(0x0000FFFFFFFFFFFF_u64, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFF0000_u64, (mask_leading_ones())); + + EXPECT_EQ(0_u128, (mask_trailing_ones())); + EXPECT_EQ(0_u128, (mask_leading_ones())); + + EXPECT_EQ(0x00000000000000007FFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFE0000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0x0000000000000000FFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF0000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0x0000000000000001FFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF8000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128, + (mask_leading_ones())); } } // namespace LIBC_NAMESPACE diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 8e94a84f586f4c..19d4c7869799a0 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -13,7 +13,11 @@ licenses(["notice"]) libc_test( name = "math_extras_test", srcs = ["math_extras_test.cpp"], - deps = ["//libc:__support_math_extras"], + deps = [ + "//libc:__support_integer_literals", + "//libc:__support_math_extras", + "//libc:__support_uint128", + ], ) # This test is currently disabled because of an issue in From 6e692e726a5bc4385ab64635e9c06b7574332e4d Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Thu, 7 Mar 2024 14:02:01 -0500 Subject: [PATCH 076/158] [mlir][sparse] Migrate to sparse_tensor.print (#83946) Continuous efforts following #83506. --- .../SparseTensor/CPU/sparse_matvec.mlir | 4 +- .../SparseTensor/CPU/sparse_mttkrp.mlir | 4 +- .../SparseTensor/CPU/sparse_out_mult_elt.mlir | 19 +++-- .../CPU/sparse_out_reduction.mlir | 29 +++---- .../SparseTensor/CPU/sparse_out_simple.mlir | 19 +++-- .../Dialect/SparseTensor/CPU/sparse_pack.mlir | 4 +- .../SparseTensor/CPU/sparse_pack_d.mlir | 53 +++++++----- .../SparseTensor/CPU/sparse_pooling_nhwc.mlir | 27 ++++-- .../CPU/sparse_quantized_matmul.mlir | 4 +- .../CPU/sparse_reduce_custom.mlir | 83 +++++++++---------- .../CPU/sparse_reduce_custom_prod.mlir | 78 +++++++++-------- .../CPU/sparse_reduce_custom_sum.mlir | 4 +- .../SparseTensor/CPU/sparse_reductions.mlir | 4 +- .../CPU/sparse_reductions_min.mlir | 4 +- .../CPU/sparse_reductions_prod.mlir | 4 +- .../SparseTensor/CPU/sparse_reshape.mlir | 53 ++++++++---- .../CPU/sparse_rewrite_push_back.mlir | 4 +- .../CPU/sparse_rewrite_sort_coo.mlir | 4 +- .../CPU/sparse_sampled_matmul.mlir | 4 +- .../CPU/sparse_sampled_mm_fusion.mlir | 34 +++++--- 20 files changed, 249 insertions(+), 190 deletions(-) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index bdad713709afa2..b9d1148301dd16 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -88,7 +88,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %i0 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir index 30e620b9d610ba..d615cb66c3d08d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -84,7 +84,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %f0 = arith.constant 0.0 : f64 %cst0 = arith.constant 0 : index %cst1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir index f7bcd1122d46c1..c30c6b9b5cc2f5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -60,7 +60,7 @@ module { } // Driver method to call and verify kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -84,11 +84,18 @@ module { // // Verify results. Only two entries stored in result! // - // CHECK: ( 14, 20, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 32, 16 ) + // CHECK-NEXT: lvl = ( 32, 16 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 2, 31 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 2, 0 + // CHECK-NEXT: values : ( 14, 20 + // CHECK-NEXT: ---- // - %val = sparse_tensor.values %0 : tensor<32x16xf32, #DCSR> to memref - %vv = vector.transfer_read %val[%c0], %f0: memref, vector<4xf32> - vector.print %vv : vector<4xf32> + sparse_tensor.print %0 : tensor<32x16xf32, #DCSR> // Release the resources. bufferization.dealloc_tensor %sta : tensor<32x16xf32, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir index 17def3f52c003e..74f0e7698bc14b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -70,10 +70,7 @@ module { } // Driver method to call and verify tensor kernel. - func.func @entry() { - %c0 = arith.constant 0 : index - %i0 = arith.constant 0 : i32 - + func.func @main() { // Setup very sparse 3-d tensors. %t1 = arith.constant sparse< [ [1,1,3], [2,0,0], [2,2,1], [2,2,2], [2,2,3] ], [ 1, 2, 3, 4, 5 ] @@ -94,23 +91,23 @@ module { // // Verify results. Only two entries stored in result. Correct structure. // - // CHECK: ( 7, 69, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0 ), ( 0, 7, 0 ), ( 0, 0, 69 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 3, 3 ) + // CHECK-NEXT: lvl = ( 3, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 1, 2 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 1, 2 + // CHECK-NEXT: values : ( 7, 69 + // CHECK-NEXT: ---- // - %val = sparse_tensor.values %0 - : tensor to memref - %vv = vector.transfer_read %val[%c0], %i0: memref, vector<4xi32> - vector.print %vv : vector<4xi32> - %dm = sparse_tensor.convert %0 - : tensor to tensor - %vm = vector.transfer_read %dm[%c0, %c0], %i0: tensor, vector<3x3xi32> - vector.print %vm : vector<3x3xi32> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %st1 : tensor bufferization.dealloc_tensor %st2 : tensor bufferization.dealloc_tensor %0 : tensor - bufferization.dealloc_tensor %dm : tensor return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir index e2d8c4fd4628d9..88513c80219a85 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -70,7 +70,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index @@ -83,11 +83,18 @@ module { // Print the result for verification. // - // CHECK: ( 1, 1.96, 4, 6.25, 9, 16.81, 16, 27.04, 25 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 5, 7, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 1, 4, 2, 0, 3, 1, 4 + // CHECK-NEXT: values : ( 1, 1.96, 4, 6.25, 9, 16.81, 16, 27.04, 25 + // CHECK-NEXT: ---- // - %m = sparse_tensor.values %0 : tensor to memref - %v = vector.transfer_read %m[%c0], %d0: memref, vector<9xf64> - vector.print %v : vector<9xf64> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %x : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir index b792d00681ddb4..7cde6b93d3250c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -54,7 +54,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f64 %i0 = arith.constant 0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir index 8a65e2449c1574..aa1bd04fde87dc 100755 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -48,7 +48,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -107,26 +107,39 @@ module { // // Verify. // - // CHECK: ( ( ( 1, 2 ), ( 3, 4 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 0, 0 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 5, 0 ), ( 6, 7 ) ), ( ( 0, 0 ), ( 8, 0 ), ( 0, 0 ) ) ) - // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) - // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 8 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 5 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 2, 1 + // CHECK-NEXT: pos[2] : ( 0, 2, 4, 5, 7, 8 + // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 0, 1, 0 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[2] : ( 0, 2, 3, 4, 6, 6, 7, 9, 11, 13, 14, 15, 16 + // CHECK-NEXT: crd[2] : ( 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 22 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 5, 8, 11 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 2, 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: values : ( 1, 2, 0, 3, 4, 0, 5, 6, 0, 7, 8, 9, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16 + // CHECK-NEXT: ---- // + sparse_tensor.print %s0 : tensor<4x3x2xf32, #CCC> + sparse_tensor.print %s1 : tensor<4x3x2xf32, #BatchedCSR> + sparse_tensor.print %s2 : tensor<4x3x2xf32, #CSRDense> - %d0 = sparse_tensor.convert %s0 : tensor<4x3x2xf32, #CCC> to tensor<4x3x2xf32> - %v0 = vector.transfer_read %d0[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v0 : vector<4x3x2xf32> - - %d1 = sparse_tensor.convert %s1 : tensor<4x3x2xf32, #BatchedCSR> to tensor<4x3x2xf32> - %v1 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v1 : vector<4x3x2xf32> - - %d2 = sparse_tensor.convert %s2 : tensor<4x3x2xf32, #CSRDense> to tensor<4x3x2xf32> - %v2 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v2 : vector<4x3x2xf32> - - bufferization.dealloc_tensor %d0 : tensor<4x3x2xf32> - bufferization.dealloc_tensor %d1 : tensor<4x3x2xf32> - bufferization.dealloc_tensor %d2 : tensor<4x3x2xf32> // FIXME: doing this explicitly crashes runtime // bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC> // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir index 3ce089d7a7cf6b..39699fbdb14e59 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -47,7 +47,7 @@ func.func @pooling_nhwc_sum(%input: tensor<1x4x4x1xf32>, %filter: tensor<2x2xf32 } -func.func @entry() { +func.func @main() { %c0 = arith.constant 0 : index %zero = arith.constant 0.00000e+00 : f32 @@ -76,17 +76,26 @@ func.func @entry() { // // Sparse pooling should have the same output. // - - // CHECK-NEXT: ( ( ( ( 6 ), ( 6 ), ( 6 ) ), ( ( 6 ), ( 6 ), ( 6 ) ), ( ( 6 ), ( 6 ), ( 6 ) ) ) ) - %s1 = sparse_tensor.convert %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> to tensor<1x3x3x1xf32> - %v1 = vector.transfer_read %s1[%c0, %c0, %c0, %c0], %zero - : tensor<1x3x3x1xf32>, vector<1x3x3x1xf32> - vector.print %v1 : vector<1x3x3x1xf32> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 1, 3, 3, 1 ) + // CHECK-NEXT: lvl = ( 1, 3, 3, 1 ) + // CHECK-NEXT: pos[0] : ( 0, 1 + // CHECK-NEXT: crd[0] : ( 0 + // CHECK-NEXT: pos[1] : ( 0, 3 + // CHECK-NEXT: crd[1] : ( 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[3] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: crd[3] : ( 0, 0, 0, 0, 0, 0, 0, 0, 0 + // CHECK-NEXT: values : ( 6, 6, 6, 6, 6, 6, 6, 6, 6 + // CHECK-NEXT: ---- + // + sparse_tensor.print %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> // Releases resources. bufferization.dealloc_tensor %in_CCCC : tensor<1x4x4x1xf32, #CCCC> bufferization.dealloc_tensor %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> bufferization.dealloc_tensor %dense_ret : tensor<1x3x3x1xf32> - bufferization.dealloc_tensor %s1 : tensor<1x3x3x1xf32> return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir index b322d965f2dc7e..873322929232a7 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -49,7 +49,7 @@ module { return %0: tensor<5x6xi32> } - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %i0 = arith.constant 0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir index 17219cde035d34..a927a5dfb94bc2 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -114,39 +114,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -171,19 +140,43 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 1, 2, 0, 0, 0 ), ( 3, 0, 0, 0, 0 ), ( 0, 0, 4, 5, 6 ), ( 7, 0, 8, 9, 0 ), ( 0, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 6, 0, 0, 0, 0 ), ( 0, 0, 0, 5, 0 ), ( 4, 0, 0, 3, 0 ), ( 0, 2, 0, 0, 0 ), ( 0, 11, 0, 0, 0 ) ) - // CHECK-NEXT: ( 7, 7, 9, 8, 7, 7, 12, 11, 11, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 7, 0, 0, 7, 0 ), ( 9, 0, 0, 0, 0 ), ( 8, 7, 0, 7, 0 ), ( 12, 11, 0, 11, 0 ), ( 0, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 7, 7, 9, 8, 7, 7, 12, 11, 11, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 7, 0, 0, 7, 0 ), ( 9, 0, 0, 0, 0 ), ( 8, 7, 0, 7, 0 ), ( 12, 11, 0, 11, 0 ), ( 0, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 5 ) + // CHECK-NEXT: lvl = ( 4, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 0, 2, 3, 4, 0, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 5, 6 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 4 ) + // CHECK-NEXT: lvl = ( 4, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 0, 1, 3, 0, 1, 3 + // CHECK-NEXT: values : ( 7, 7, 9, 8, 7, 7, 12, 11, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 4 ) + // CHECK-NEXT: lvl = ( 4, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 0, 1, 3, 0, 1, 3 + // CHECK-NEXT: values : ( 7, 7, 9, 8, 7, 7, 12, 11, 11 + // CHECK-NEXT: ---- // - call @dump_mat(%sm1) : (tensor) -> () - call @dump_mat(%sm2r) : (tensor) -> () - call @dump_mat(%5) : (tensor) -> () - call @dump_mat(%6) : (tensor) -> () + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %sm2r : tensor + sparse_tensor.print %5 : tensor + sparse_tensor.print %6 : tensor // Release the resources. bufferization.dealloc_tensor %sm1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir index 6e2c572cf21ba0..18bf6a71c53058 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -89,39 +89,9 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -144,15 +114,43 @@ module { // // Verify the results. // - // CHECK: ( 2, 3, 120, 504, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 3, 120, 504, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 12, 2, 11, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 12, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 5 ) + // CHECK-NEXT: lvl = ( 4, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 0, 2, 3, 4, 0, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 5, 6 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 4 ) + // CHECK-NEXT: lvl = ( 4 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: values : ( 2, 3, 120, 504 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 5 ) + // CHECK-NEXT: lvl = ( 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: values : ( 6, 5, 12, 2, 11 + // CHECK-NEXT: ---- // - call @dump_mat(%sm1) : (tensor) -> () - call @dump_mat(%sm2r) : (tensor) -> () - call @dump_vec(%1) : (tensor) -> () - call @dump_vec(%2) : (tensor) -> () + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %sm2r : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor // Release the resources. bufferization.dealloc_tensor %sm1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir index 80c35676e804b1..8588ebd98cc918 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -145,7 +145,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense<0> : tensor // Sparse vector of length 8 with 2 stored elements (and thus 6 implicit zeros). diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir index 4ad23d1c031238..96ec8bad8b5515 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -106,7 +106,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense< 7 > : tensor %rf = arith.constant dense< 2.0 > : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir index 198920024221b0..16c1d7df7e89dd 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -82,7 +82,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense<999> : tensor // Vectors with a few zeros. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir index 7ec30787fea464..4797fbb8f5319c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -162,7 +162,7 @@ module { return } - func.func @entry() { + func.func @main() { // Note: Constants bufferize to read-only buffers. %ri = arith.constant dense< 7 > : tensor %rf = arith.constant dense< 2.0 > : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir index b551f9545dc436..4c26ebe6e401ba 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -63,7 +63,7 @@ module { } - func.func @entry() { + func.func @main() { %m = arith.constant dense <[ [ 1.1, 0.0, 1.3, 0.0 ], [ 2.1, 0.0, 2.3, 0.0 ], [ 3.1, 0.0, 3.3, 0.0 ]]> : tensor<3x4xf64> @@ -76,20 +76,41 @@ module { %c0 = arith.constant 0 : index %df = arith.constant -1.0 : f64 - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b0 = sparse_tensor.values %reshaped0: tensor<2x6xf64, #SparseMatrix> to memref - %v0 = vector.transfer_read %b0[%c0], %df: memref, vector<12xf64> - vector.print %v0 : vector<12xf64> - - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b1 = sparse_tensor.values %reshaped1: tensor<12xf64, #SparseVector> to memref - %v1 = vector.transfer_read %b1[%c0], %df: memref, vector<12xf64> - vector.print %v1 : vector<12xf64> - - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b2 = sparse_tensor.values %reshaped2: tensor<2x3x2xf64, #Sparse3dTensor> to memref - %v2 = vector.transfer_read %b2[%c0], %df: memref, vector<12xf64> - vector.print %v2: vector<12xf64> + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 2, 6 ) + // CHECK-NEXT: lvl = ( 2, 6 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 4, 0, 2, 4 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 12 ) + // CHECK-NEXT: lvl = ( 12 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 2, 4, 6, 8, 10 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 2, 3, 2 ) + // CHECK-NEXT: lvl = ( 2, 3, 2 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 1, 2, 3, 4, 5, 6 + // CHECK-NEXT: crd[2] : ( 0, 0, 0, 0, 0, 0 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // + sparse_tensor.print %reshaped0: tensor<2x6xf64, #SparseMatrix> + sparse_tensor.print %reshaped1: tensor<12xf64, #SparseVector> + sparse_tensor.print %reshaped2: tensor<2x3x2xf64, #Sparse3dTensor> bufferization.dealloc_tensor %sm : tensor<3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %reshaped0 : tensor<2x6xf64, #SparseMatrix> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir index c2e83fc61c4b53..1536249e60f286 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -28,7 +28,7 @@ // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} module { - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir index e7dd0ad32a2430..0682bc6f314fd1 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -64,7 +64,7 @@ module { } // The main driver. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : i32 %c1 = arith.constant 1 : i32 %c2 = arith.constant 2 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir index 3330d2249707fc..085b36a368704d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -81,7 +81,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir index afaf36c4072c09..20a8c5f812de9b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -169,7 +169,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index @@ -207,22 +207,36 @@ module { // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 192 ) ) // - // CHECK-NEXT: ( 96, 192, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 0, 7 + // CHECK-NEXT: values : ( 96, 192 + // CHECK-NEXT: ---- // - // CHECK-NEXT: ( 96, 192, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 0, 7 + // CHECK-NEXT: values : ( 96, 192 + // CHECK-NEXT: ---- // - %m2 = sparse_tensor.values %2 : tensor<8x8xf64, #SM> to memref - %m3 = sparse_tensor.values %3 : tensor<8x8xf64, #SM> to memref %v0 = vector.transfer_read %0[%c0, %c0], %d0 : tensor<8x8xf64>, vector<8x8xf64> %v1 = vector.transfer_read %1[%c0, %c0], %d0 : tensor<8x8xf64>, vector<8x8xf64> - %v2 = vector.transfer_read %m2[%c0], %d0 : memref, vector<4xf64> - %v3 = vector.transfer_read %m3[%c0], %d0 : memref, vector<4xf64> vector.print %v0 : vector<8x8xf64> vector.print %v1 : vector<8x8xf64> - vector.print %v2 : vector<4xf64> - vector.print %v3 : vector<4xf64> + sparse_tensor.print %2 : tensor<8x8xf64, #SM> + sparse_tensor.print %3 : tensor<8x8xf64, #SM> // Release the resources. bufferization.dealloc_tensor %s : tensor<8x8xf64, #SM> From 4cb5a96af646e18f9fc8c1b337299d5465f0a4d6 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Thu, 7 Mar 2024 14:02:20 -0500 Subject: [PATCH 077/158] [mlir][sparse] Migrate more tests to sparse_tensor.print (#84249) Continuous efforts following #83946. --- .../SparseTensor/CPU/sparse_scale.mlir | 17 +- .../SparseTensor/CPU/sparse_scf_nested.mlir | 46 ++-- .../SparseTensor/CPU/sparse_select.mlir | 83 ++++---- .../CPU/sparse_semiring_select.mlir | 26 ++- .../Dialect/SparseTensor/CPU/sparse_sign.mlir | 17 +- .../SparseTensor/CPU/sparse_sorted_coo.mlir | 200 ++++++------------ .../Dialect/SparseTensor/CPU/sparse_spmm.mlir | 4 +- .../SparseTensor/CPU/sparse_storage.mlir | 197 ++++++----------- .../CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir | 4 +- .../Dialect/SparseTensor/CPU/sparse_sum.mlir | 4 +- .../SparseTensor/CPU/sparse_sum_bf16.mlir | 4 +- .../SparseTensor/CPU/sparse_sum_c32.mlir | 4 +- .../SparseTensor/CPU/sparse_sum_f16.mlir | 4 +- .../Dialect/SparseTensor/CPU/sparse_tanh.mlir | 37 +--- .../SparseTensor/CPU/sparse_tensor_mul.mlir | 36 ++-- .../SparseTensor/CPU/sparse_tensor_ops.mlir | 44 ++-- .../SparseTensor/CPU/sparse_transpose.mlir | 45 ++-- .../CPU/sparse_transpose_coo.mlir | 36 ++-- .../SparseTensor/CPU/sparse_unary.mlir | 126 ++++++----- .../SparseTensor/CPU/sparse_vector_ops.mlir | 104 +++++---- 20 files changed, 478 insertions(+), 560 deletions(-) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir index 6ec13fd623b5cd..4e9090ae201d02 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -65,7 +65,7 @@ module { // and then calls the sparse scaling kernel with the sparse tensor // as input argument. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -88,11 +88,16 @@ module { // Print the resulting compacted values for verification. // - // CHECK: ( 2, 2, 2, 4, 6, 8, 2, 10, 2, 2, 12, 2, 14, 2, 2, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 4, 5, 6, 8, 11, 14, 16 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 1, 2, 3, 1, 4, 1, 2, 5, 2, 6, 7, 2, 7 + // CHECK-NEXT: values : ( 2, 2, 2, 4, 6, 8, 2, 10, 2, 2, 12, 2, 14, 2, 2, 16 + // CHECK-NEXT: ---- // - %m = sparse_tensor.values %2 : tensor<8x8xf32, #CSR> to memref - %v = vector.transfer_read %m[%c0], %f0: memref, vector<16xf32> - vector.print %v : vector<16xf32> + sparse_tensor.print %2 : tensor<8x8xf32, #CSR> // Release the resources. bufferization.dealloc_tensor %1 : tensor<8x8xf32, #CSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir index 439144fedeeb89..dd8396dc23b036 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -68,17 +68,7 @@ module @func_sparse.2 { return %1 : tensor<2x3x4xf64, #SparseMatrix> } - func.func @dump(%arg0: tensor<2x3x4xf64, #SparseMatrix>) { - %d0 = arith.constant 0.0 : f64 - %c0 = arith.constant 0 : index - %dm = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseMatrix> to tensor<2x3x4xf64> - %0 = vector.transfer_read %dm[%c0, %c0, %c0], %d0: tensor<2x3x4xf64>, vector<2x3x4xf64> - vector.print %0 : vector<2x3x4xf64> - bufferization.dealloc_tensor %dm : tensor<2x3x4xf64> - return - } - - func.func public @entry() { + func.func public @main() { %src = arith.constant dense<[ [ [ 1.0, 2.0, 3.0, 4.0 ], [ 5.0, 6.0, 7.0, 8.0 ], @@ -96,10 +86,34 @@ module @func_sparse.2 { %sm_t = call @condition(%t, %sm) : (i1, tensor<2x3x4xf64, #SparseMatrix>) -> tensor<2x3x4xf64, #SparseMatrix> %sm_f = call @condition(%f, %sm) : (i1, tensor<2x3x4xf64, #SparseMatrix>) -> tensor<2x3x4xf64, #SparseMatrix> - // CHECK: ( ( ( 0, 1, 2, 3 ), ( 4, 5, 6, 7 ), ( 8, 9, 10, 11 ) ), ( ( 12, 13, 14, 15 ), ( 16, 17, 18, 19 ), ( 20, 21, 22, 23 ) ) ) - // CHECK-NEXT: ( ( ( 2, 3, 4, 5 ), ( 6, 7, 8, 9 ), ( 10, 11, 12, 13 ) ), ( ( 14, 15, 16, 17 ), ( 18, 19, 20, 21 ), ( 22, 23, 24, 25 ) ) ) - call @dump(%sm_t) : (tensor<2x3x4xf64, #SparseMatrix>) -> () - call @dump(%sm_f) : (tensor<2x3x4xf64, #SparseMatrix>) -> () + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + // CHECK-NEXT: ---- + // + sparse_tensor.print %sm_t : tensor<2x3x4xf64, #SparseMatrix> + sparse_tensor.print %sm_f : tensor<2x3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %sm : tensor<2x3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %sm_t : tensor<2x3x4xf64, #SparseMatrix> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir index 533afb6644aeda..68bc17175e3b4b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -97,39 +97,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -151,19 +120,43 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, -4, 0, 5, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 0, 2, 0, -4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0, 1, 0 ), ( 0, 0, 0, 0, 2 ), ( 0, 3, 0, 4, 0 ), ( 0, 0, 0, 5, 6 ), ( 0, 0, 7, 0, 0 ) ) - // CHECK-NEXT: ( 1, 2, 5, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 0, 2, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0, 1, 0 ), ( 0, 0, 0, 0, 2 ), ( 0, 0, 0, 4, 0 ), ( 0, 0, 0, 0, 6 ), ( 0, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 10 ) + // CHECK-NEXT: lvl = ( 10 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 1, 3, 5, 7, 9 + // CHECK-NEXT: values : ( 1, 2, -4, 0, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 7 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 6, 7 + // CHECK-NEXT: crd[1] : ( 3, 4, 1, 3, 3, 4, 2 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 3 + // CHECK-NEXT: dim = ( 10 ) + // CHECK-NEXT: lvl = ( 10 ) + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 1, 3, 9 + // CHECK-NEXT: values : ( 1, 2, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 3, 4, 4 + // CHECK-NEXT: crd[1] : ( 3, 4, 3, 4 + // CHECK-NEXT: values : ( 1, 2, 4, 6 + // CHECK-NEXT: ---- // - call @dump_vec(%sv1) : (tensor) -> () - call @dump_mat(%sm1) : (tensor) -> () - call @dump_vec(%1) : (tensor) -> () - call @dump_mat(%2) : (tensor) -> () + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir index 6244be0ba7ab64..f4435c81117b2d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -60,7 +60,7 @@ module { } // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f64 @@ -86,20 +86,24 @@ module { tensor<5x5xf64, #DCSR>) -> tensor<5x5xf64, #DCSR> - // CHECK: ( ( 0.1, 1.1, 0, 0, 0 ), - // CHECK-SAME: ( 0, 1.1, 2.2, 0, 0 ), - // CHECK-SAME: ( 0, 0, 2.1, 3.3, 0 ), - // CHECK-SAME: ( 0, 0, 0, 3.1, 4.4 ), - // CHECK-SAME: ( 0, 0, 0, 0, 4.1 ) ) - %r = sparse_tensor.convert %1 : tensor<5x5xf64, #DCSR> to tensor<5x5xf64> - %v2 = vector.transfer_read %r[%c0, %c0], %f0 : tensor<5x5xf64>, vector<5x5xf64> - vector.print %v2 : vector<5x5xf64> + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 2, 2, 3, 3, 4, 4 + // CHECK-NEXT: values : ( 0.1, 1.1, 1.1, 2.2, 2.1, 3.3, 3.1, 4.4, 4.1 + // CHECK-NEXT: ---- + // + sparse_tensor.print %1 : tensor<5x5xf64, #DCSR> // Release the resources. bufferization.dealloc_tensor %sl: tensor<5x5xf64, #DCSR> bufferization.dealloc_tensor %sr: tensor<5x5xf64, #DCSR> bufferization.dealloc_tensor %1: tensor<5x5xf64, #DCSR> - bufferization.dealloc_tensor %r : tensor<5x5xf64> return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir index 08e75dfa2c02ca..c09374918b7d6a 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -79,7 +79,7 @@ module { } // Driver method to call and verify sign kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %du = arith.constant 0.0 : f64 @@ -110,11 +110,16 @@ module { // // Verify the results. // - // CHECK: ( -1, 1, -1, 1, 1, -1, nan, -nan, 1, -1, -0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 12 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 12 + // CHECK-NEXT: crd[0] : ( 0, 3, 5, 11, 13, 17, 18, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( -1, 1, -1, 1, 1, -1, nan, -nan, 1, -1, -0, 0 + // CHECK-NEXT: ---- // - %1 = sparse_tensor.values %0 : tensor to memref - %2 = vector.transfer_read %1[%c0], %du: memref, vector<13xf64> - vector.print %2 : vector<13xf64> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir index e0111f692601f0..7b3f9a2ce0e012 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -35,19 +35,19 @@ !Filename = !llvm.ptr #SortedCOO = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) }> #SortedCOOPermuted = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton), + map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton(soa)), }> #SortedCOO3D = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique), d2 : singleton) + map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique, soa), d2 : singleton(soa)) }> #SortedCOO3DPermuted = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d2 : compressed(nonunique), d0 : singleton(nonunique), d1 : singleton) + map = (d0, d1, d2) -> (d2 : compressed(nonunique), d0 : singleton(nonunique, soa), d1 : singleton(soa)) }> @@ -82,29 +82,7 @@ module { return %0 : tensor } - func.func @dumpi(%arg0: memref) { - %c0 = arith.constant 0 : index - %v = vector.transfer_read %arg0[%c0], %c0: memref, vector<20xindex> - vector.print %v : vector<20xindex> - return - } - - func.func @dumpsi(%arg0: memref>) { - %c0 = arith.constant 0 : index - %v = vector.transfer_read %arg0[%c0], %c0: memref>, vector<20xindex> - vector.print %v : vector<20xindex> - return - } - - func.func @dumpf(%arg0: memref) { - %c0 = arith.constant 0 : index - %nan = arith.constant 0x0 : f64 - %v = vector.transfer_read %arg0[%c0], %nan: memref, vector<20xf64> - vector.print %v : vector<20xf64> - return - } - - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -125,130 +103,88 @@ module { %4 = sparse_tensor.convert %m : tensor<5x4xf64> to tensor // - // CHECK: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255, 0, 0, 0 ) - // CHECK-NEXT: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 4, 256 ) + // CHECK-NEXT: lvl = ( 4, 256 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + // CHECK-NEXT: crd[1] : ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 + // CHECK-NEXT: values : ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 + // CHECK-NEXT: ---- // - %p0 = sparse_tensor.positions %0 { level = 0 : index } - : tensor to memref - %i00 = sparse_tensor.coordinates %0 { level = 0 : index } - : tensor to memref> - %i01 = sparse_tensor.coordinates %0 { level = 1 : index } - : tensor to memref> - %v0 = sparse_tensor.values %0 - : tensor to memref - call @dumpi(%p0) : (memref) -> () - call @dumpsi(%i00) : (memref>) -> () - call @dumpsi(%i01) : (memref>) -> () - call @dumpf(%v0) : (memref) -> () + sparse_tensor.print %0 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3, 0, 0, 0 ) - // CHECK-NEXT: ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 4, 256 ) + // CHECK-NEXT: lvl = ( 256, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255 + // CHECK-NEXT: crd[1] : ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3 + // CHECK-NEXT: values : ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17 + // CHECK-NEXT: ---- // - %p1 = sparse_tensor.positions %1 { level = 0 : index } - : tensor to memref - %i10 = sparse_tensor.coordinates %1 { level = 0 : index } - : tensor to memref> - %i11 = sparse_tensor.coordinates %1 { level = 1 : index } - : tensor to memref> - %v1 = sparse_tensor.values %1 - : tensor to memref - call @dumpi(%p1) : (memref) -> () - call @dumpsi(%i10) : (memref>) -> () - call @dumpsi(%i11) : (memref>) -> () - call @dumpf(%v1) : (memref) -> () + sparse_tensor.print %1 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) - // CHECK-NEXT: ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 + // CHECK-NEXT: crd[1] : ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2 + // CHECK-NEXT: crd[2] : ( 2, 3, 1, 2, 0, 1, 2, 3, 0, 2, 3, 0, 1, 2, 3, 1, 2 + // CHECK-NEXT: values : ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18 + // CHECK-NEXT: ---- // - %p2 = sparse_tensor.positions %2 { level = 0 : index } - : tensor to memref - %i20 = sparse_tensor.coordinates %2 { level = 0 : index } - : tensor to memref> - %i21 = sparse_tensor.coordinates %2 { level = 1 : index } - : tensor to memref> - %i22 = sparse_tensor.coordinates %2 { level = 2 : index } - : tensor to memref> - %v2 = sparse_tensor.values %2 - : tensor to memref - call @dumpi(%p2) : (memref) -> () - call @dumpsi(%i20) : (memref>) -> () - call @dumpsi(%i21) : (memref>) -> () - call @dumpsi(%i21) : (memref>) -> () - call @dumpf(%v2) : (memref) -> () + sparse_tensor.print %2 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 4, 2, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1 + // CHECK-NEXT: crd[2] : ( 2, 0, 1, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 0, 1 + // CHECK-NEXT: values : ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75 + // CHECK-NEXT: ---- // - %p3 = sparse_tensor.positions %3 { level = 0 : index } - : tensor to memref - %i30 = sparse_tensor.coordinates %3 { level = 0 : index } - : tensor to memref> - %i31 = sparse_tensor.coordinates %3 { level = 1 : index } - : tensor to memref> - %i32 = sparse_tensor.coordinates %3 { level = 2 : index } - : tensor to memref> - %v3 = sparse_tensor.values %3 - : tensor to memref - call @dumpi(%p3) : (memref) -> () - call @dumpsi(%i30) : (memref>) -> () - call @dumpsi(%i31) : (memref>) -> () - call @dumpsi(%i31) : (memref>) -> () - call @dumpf(%v3) : (memref) -> () + sparse_tensor.print %3 : tensor // - // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- // - %p4 = sparse_tensor.positions %4 { level = 0 : index } - : tensor to memref - %i40 = sparse_tensor.coordinates %4 { level = 0 : index } - : tensor to memref> - %i41 = sparse_tensor.coordinates %4 { level = 1 : index } - : tensor to memref> - %v4 = sparse_tensor.values %4 - : tensor to memref - call @dumpi(%p4) : (memref) -> () - call @dumpsi(%i40) : (memref>) -> () - call @dumpsi(%i41) : (memref>) -> () - call @dumpf(%v4) : (memref) -> () + sparse_tensor.print %4 : tensor // And last but not least, an actual operation applied to COO. // Note that this performs the operation "in place". %5 = call @sparse_scale(%4) : (tensor) -> tensor // - // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 12, 10, 8, 6, 4, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 12, 10, 8, 6, 4, 22 + // CHECK-NEXT: ---- // - %p5 = sparse_tensor.positions %5 { level = 0 : index } - : tensor to memref - %i50 = sparse_tensor.coordinates %5 { level = 0 : index } - : tensor to memref> - %i51 = sparse_tensor.coordinates %5 { level = 1 : index } - : tensor to memref> - %v5 = sparse_tensor.values %5 - : tensor to memref - call @dumpi(%p5) : (memref) -> () - call @dumpsi(%i50) : (memref>) -> () - call @dumpsi(%i51) : (memref>) -> () - call @dumpf(%v5) : (memref) -> () + sparse_tensor.print %5 : tensor // Release the resources. bufferization.dealloc_tensor %0 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir index 573b1a2aac2598..ca8bcd7744c8f4 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -76,7 +76,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %i0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir index 8ca95f2139e49a..2ee189de7906ca 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -72,7 +72,7 @@ module { // are typically not concerned with such details, but the test ensures // everything is working "under the hood". // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %d0 = arith.constant 0.0 : f64 @@ -107,166 +107,103 @@ module { // // Inspect storage scheme of Dense. // - // CHECK: ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, - // CHECK-SAME: 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, - // CHECK-SAME: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 80 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: values : ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0 + // CHECK-NEXT: ---- // - %5 = sparse_tensor.values %0 : tensor<10x8xf64, #Dense> to memref - %6 = vector.transfer_read %5[%c0], %d0: memref, vector<80xf64> - vector.print %6 : vector<80xf64> + sparse_tensor.print %0 : tensor<10x8xf64, #Dense> // // Inspect storage scheme of CSR. // - // positions(1) - // indices(1) - // values // - // CHECK: ( 0, 3, 3, 4, 5, 6, 9, 12, 16, 16, 17 ) - // CHECK: ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 ) - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 3, 4, 5, 6, 9, 12, 16, 16, 17 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + // CHECK-NEXT: ---- // - %7 = sparse_tensor.positions %1 { level = 1 : index } : tensor<10x8xf64, #CSR> to memref - %8 = vector.transfer_read %7[%c0], %c0: memref, vector<11xindex> - vector.print %8 : vector<11xindex> - %9 = sparse_tensor.coordinates %1 { level = 1 : index } : tensor<10x8xf64, #CSR> to memref - %10 = vector.transfer_read %9[%c0], %c0: memref, vector<17xindex> - vector.print %10 : vector<17xindex> - %11 = sparse_tensor.values %1 : tensor<10x8xf64, #CSR> to memref - %12 = vector.transfer_read %11[%c0], %d0: memref, vector<17xf64> - vector.print %12 : vector<17xf64> + sparse_tensor.print %1 : tensor<10x8xf64, #CSR> // // Inspect storage scheme of DCSR. // - // positions(0) - // indices(0) - // positions(1) - // indices(1) - // values + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 8 + // CHECK-NEXT: crd[0] : ( 0, 2, 3, 4, 5, 6, 7, 9 + // CHECK-NEXT: pos[1] : ( 0, 3, 4, 5, 6, 9, 12, 16, 17 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + // CHECK-NEXT: ---- // - // CHECK: ( 0, 8 ) - // CHECK: ( 0, 2, 3, 4, 5, 6, 7, 9 ) - // CHECK: ( 0, 3, 4, 5, 6, 9, 12, 16, 17 ) - // CHECK: ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 ) - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ) - // - %13 = sparse_tensor.positions %2 { level = 0 : index } : tensor<10x8xf64, #DCSR> to memref - %14 = vector.transfer_read %13[%c0], %c0: memref, vector<2xindex> - vector.print %14 : vector<2xindex> - %15 = sparse_tensor.coordinates %2 { level = 0 : index } : tensor<10x8xf64, #DCSR> to memref - %16 = vector.transfer_read %15[%c0], %c0: memref, vector<8xindex> - vector.print %16 : vector<8xindex> - %17 = sparse_tensor.positions %2 { level = 1 : index } : tensor<10x8xf64, #DCSR> to memref - %18 = vector.transfer_read %17[%c0], %c0: memref, vector<9xindex> - vector.print %18 : vector<9xindex> - %19 = sparse_tensor.coordinates %2 { level = 1 : index } : tensor<10x8xf64, #DCSR> to memref - %20 = vector.transfer_read %19[%c0], %c0: memref, vector<17xindex> - vector.print %20 : vector<17xindex> - %21 = sparse_tensor.values %2 : tensor<10x8xf64, #DCSR> to memref - %22 = vector.transfer_read %21[%c0], %d0: memref, vector<17xf64> - vector.print %22 : vector<17xf64> + sparse_tensor.print %2 : tensor<10x8xf64, #DCSR> // // Inspect storage scheme of CSC. // - // positions(1) - // indices(1) - // values - // - // CHECK: ( 0, 1, 3, 8, 9, 10, 10, 13, 17 ) - // CHECK: ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 ) - // CHECK: ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 3, 8, 9, 10, 10, 13, 17 + // CHECK-NEXT: crd[1] : ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 + // CHECK-NEXT: values : ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 + // CHECK-NEXT: ---- // - %23 = sparse_tensor.positions %3 { level = 1 : index } : tensor<10x8xf64, #CSC> to memref - %24 = vector.transfer_read %23[%c0], %c0: memref, vector<9xindex> - vector.print %24 : vector<9xindex> - %25 = sparse_tensor.coordinates %3 { level = 1 : index } : tensor<10x8xf64, #CSC> to memref - %26 = vector.transfer_read %25[%c0], %c0: memref, vector<17xindex> - vector.print %26 : vector<17xindex> - %27 = sparse_tensor.values %3 : tensor<10x8xf64, #CSC> to memref - %28 = vector.transfer_read %27[%c0], %d0: memref, vector<17xf64> - vector.print %28 : vector<17xf64> + sparse_tensor.print %3 : tensor<10x8xf64, #CSC> // // Inspect storage scheme of DCSC. // - // positions(0) - // indices(0) - // positions(1) - // indices(1) - // values - // - // CHECK: ( 0, 7 ) - // CHECK: ( 0, 1, 2, 3, 4, 6, 7 ) - // CHECK: ( 0, 1, 3, 8, 9, 10, 13, 17 ) - // CHECK: ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 ) - // CHECK: ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 7 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 6, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 3, 8, 9, 10, 13, 17 + // CHECK-NEXT: crd[1] : ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 + // CHECK-NEXT: values : ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 + // CHECK-NEXT: ---- // - %29 = sparse_tensor.positions %4 { level = 0 : index } : tensor<10x8xf64, #DCSC> to memref - %30 = vector.transfer_read %29[%c0], %c0: memref, vector<2xindex> - vector.print %30 : vector<2xindex> - %31 = sparse_tensor.coordinates %4 { level = 0 : index } : tensor<10x8xf64, #DCSC> to memref - %32 = vector.transfer_read %31[%c0], %c0: memref, vector<7xindex> - vector.print %32 : vector<7xindex> - %33 = sparse_tensor.positions %4 { level = 1 : index } : tensor<10x8xf64, #DCSC> to memref - %34 = vector.transfer_read %33[%c0], %c0: memref, vector<8xindex> - vector.print %34 : vector<8xindex> - %35 = sparse_tensor.coordinates %4 { level = 1 : index } : tensor<10x8xf64, #DCSC> to memref - %36 = vector.transfer_read %35[%c0], %c0: memref, vector<17xindex> - vector.print %36 : vector<17xindex> - %37 = sparse_tensor.values %4 : tensor<10x8xf64, #DCSC> to memref - %38 = vector.transfer_read %37[%c0], %d0: memref, vector<17xf64> - vector.print %38 : vector<17xf64> + sparse_tensor.print %4 : tensor<10x8xf64, #DCSC> // // Inspect storage scheme of BlockRow. // - // positions(0) - // indices(0) - // values + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 64 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 8 + // CHECK-NEXT: crd[0] : ( 0, 2, 3, 4, 5, 6, 7, 9 + // CHECK-NEXT: values : ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 17, 0 + // CHECK-NEXT: ---- // - // CHECK: ( 0, 8 ) - // CHECK: ( 0, 2, 3, 4, 5, 6, 7, 9 ) - // CHECK: ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, - // CHECK-SAME: 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, - // CHECK-SAME: 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 17, 0 ) - // - %39 = sparse_tensor.positions %x { level = 0 : index } : tensor<10x8xf64, #BlockRow> to memref - %40 = vector.transfer_read %39[%c0], %c0: memref, vector<2xindex> - vector.print %40 : vector<2xindex> - %41 = sparse_tensor.coordinates %x { level = 0 : index } : tensor<10x8xf64, #BlockRow> to memref - %42 = vector.transfer_read %41[%c0], %c0: memref, vector<8xindex> - vector.print %42 : vector<8xindex> - %43 = sparse_tensor.values %x : tensor<10x8xf64, #BlockRow> to memref - %44 = vector.transfer_read %43[%c0], %d0: memref, vector<64xf64> - vector.print %44 : vector<64xf64> + sparse_tensor.print %x : tensor<10x8xf64, #BlockRow> // // Inspect storage scheme of BlockCol. // - // positions(0) - // indices(0) - // values - // - // CHECK: ( 0, 7 ) - // CHECK: ( 0, 1, 2, 3, 4, 6, 7 ) - // CHECK: ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 13, 0, 0, 2, 0, 4, 0, - // CHECK-SAME: 0, 8, 10, 14, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, - // CHECK-SAME: 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 15, 0, 17, 3, 0, 0, 0, 0, 9, 12, 16, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 70 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 7 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 6, 7 + // CHECK-NEXT: values : ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 13, 0, 0, 2, 0, 4, 0, 0, 8, 10, 14, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 15, 0, 17, 3, 0, 0, 0, 0, 9, 12, 16, 0, 0 + // CHECK-NEXT: ---- // - %45 = sparse_tensor.positions %y { level = 0 : index } : tensor<10x8xf64, #BlockCol> to memref - %46 = vector.transfer_read %45[%c0], %c0: memref, vector<2xindex> - vector.print %46 : vector<2xindex> - %47 = sparse_tensor.coordinates %y { level = 0 : index } : tensor<10x8xf64, #BlockCol> to memref - %48 = vector.transfer_read %47[%c0], %c0: memref, vector<7xindex> - vector.print %48 : vector<7xindex> - %49 = sparse_tensor.values %y : tensor<10x8xf64, #BlockCol> to memref - %50 = vector.transfer_read %49[%c0], %d0: memref, vector<70xf64> - vector.print %50 : vector<70xf64> + sparse_tensor.print %y : tensor<10x8xf64, #BlockCol> // Release the resources. bufferization.dealloc_tensor %0 : tensor<10x8xf64, #Dense> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir index 5184083f665d56..2b2b8536fe39ed 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -78,7 +78,7 @@ func.func @conv_2d_nhwc_hwcf_dual_CDCC(%arg0: tensor, %arg1: } -func.func @entry() { +func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir index e6cbff231024ed..d1c58bfb6d59ef 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -75,7 +75,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir index ee00a19a412306..16a8b50ab08e5c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { // Setup input sparse matrix from compressed constant. %d = arith.constant dense <[ [ 1.1, 1.2, 0.0, 1.4 ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir index 5fdf636ef1230a..f95c163a57c164 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -75,7 +75,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { //%d0 = arith.constant 0.0 : complex %d0 = complex.constant [0.0 : f64, 0.0 : f64] : complex %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir index 6a34695229495d..30be587c8f6119 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -66,7 +66,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { // Setup input sparse matrix from compressed constant. %d = arith.constant dense <[ [ 1.1, 1.2, 0.0, 1.4 ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir index 336044d5660057..29bc744c992032 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -56,28 +56,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec_f64(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant -1.0 : f64 - %n = sparse_tensor.number_of_entries %arg0: tensor - vector.print %n : index - %0 = sparse_tensor.values %arg0 - : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<9xf64> - vector.print %1 : vector<9xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 - : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %3 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { // Setup sparse vector. %v1 = arith.constant sparse< [ [0], [3], [11], [17], [20], [21], [28], [29], [31] ], @@ -93,11 +73,16 @@ module { // // Verify the results (within some precision). // - // CHECK: 9 - // CHECK-NEXT: {{( -0.761[0-9]*, 0.761[0-9]*, 0.96[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 1 )}} - // CHECK-NEXT: {{( -0.761[0-9]*, 0, 0, 0.761[0-9]*, 0, 0, 0, 0, 0, 0, 0, 0.96[0-9]*, 0, 0, 0, 0, 0, 0.99[0-9]*, 0, 0, 0.99[0-9]*, 0.99[0-9]*, 0, 0, 0, 0, 0, 0, 0.99[0-9]*, 0.99[0-9]*, 0, 1 )}} + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ({{ -0.761[0-9]*, 0.761[0-9]*, 0.96[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 1}} + // CHECK-NEXT: ---- // - call @dump_vec_f64(%0) : (tensor) -> () + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir index d53b03025f5588..67155201c58442 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { } // Driver method to call and verify tensor multiplication kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %default_val = arith.constant -1.0 : f64 @@ -103,30 +103,28 @@ module { %0 = call @tensor_mul(%sta, %stb) : (tensor, tensor) -> tensor - // Verify results // - // CHECK: 4 - // CHECK-NEXT: ( 2.4, 3.5, 2, 8 ) - // CHECK-NEXT: ( ( ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 2.4, 0, 3.5, 0, 0 ) ), - // CHECK-SAME: ( ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ) ), - // CHECK-SAME: ( ( 2, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 0, 0, 8, 0, 0 ) ) ) + // Verify results. // - %n = sparse_tensor.number_of_entries %0 : tensor - vector.print %n : index - %m1 = sparse_tensor.values %0 : tensor to memref - %v1 = vector.transfer_read %m1[%c0], %default_val: memref, vector<4xf64> - vector.print %v1 : vector<4xf64> - - // Print %0 in dense form. - %dt = sparse_tensor.convert %0 : tensor to tensor - %v2 = vector.transfer_read %dt[%c0, %c0, %c0], %default_val: tensor, vector<3x3x5xf64> - vector.print %v2 : vector<3x3x5xf64> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 3, 3, 5 ) + // CHECK-NEXT: lvl = ( 3, 3, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 1, 3 + // CHECK-NEXT: crd[1] : ( 2, 0, 2 + // CHECK-NEXT: pos[2] : ( 0, 2, 3, 4 + // CHECK-NEXT: crd[2] : ( 0, 2, 0, 2 + // CHECK-NEXT: values : ( 2.4, 3.5, 2, 8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sta : tensor bufferization.dealloc_tensor %stb : tensor bufferization.dealloc_tensor %0 : tensor - bufferization.dealloc_tensor %dt : tensor return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir index 6ef6b393019a8e..356808ebee3f7c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { } // Driver method to call and verify tensor kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %d1 = arith.constant -1.0 : f64 @@ -90,22 +90,34 @@ module { // Call sparse vector kernels. %0 = call @tensor_scale(%st) : (tensor) -> tensor + // // Sanity check on stored values. // - // CHECK: 5 - // CHECK-NEXT: ( 1, 2, 3, 4, 5 ) - // CHECK-NEXT: 24 - // CHECK-NEXT: ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10 ) - %m1 = sparse_tensor.values %st : tensor to memref - %m2 = sparse_tensor.values %0 : tensor to memref - %n1 = sparse_tensor.number_of_entries %st : tensor - %n2 = sparse_tensor.number_of_entries %0 : tensor - %v1 = vector.transfer_read %m1[%c0], %d1: memref, vector<5xf64> - %v2 = vector.transfer_read %m2[%c0], %d1: memref, vector<24xf64> - vector.print %n1 : index - vector.print %v1 : vector<5xf64> - vector.print %n2 : index - vector.print %v2 : vector<24xf64> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 3, 4, 8 ) + // CHECK-NEXT: lvl = ( 3, 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 3, 2 + // CHECK-NEXT: pos[2] : ( 0, 1, 2, 5 + // CHECK-NEXT: crd[2] : ( 0, 7, 1, 2, 7 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 3, 4, 8 ) + // CHECK-NEXT: lvl = ( 3, 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 3, 2 + // CHECK-NEXT: values : ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10 + // CHECK-NEXT: ---- + // + sparse_tensor.print %st : tensor + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %st : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir index 185f6161493e04..549c2082fcb3ac 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -92,7 +92,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c4 = arith.constant 4 : index @@ -115,26 +115,29 @@ module { // // Verify result. // - // CHECK: ( 1.1, 0, 3.1 ) - // CHECK-NEXT: ( 1.2, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 3.3 ) - // CHECK-NEXT: ( 1.4, 0, 3.4 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 4, 3 ) + // CHECK-NEXT: lvl = ( 4, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 + // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 4, 3 ) + // CHECK-NEXT: lvl = ( 4, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 + // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 + // CHECK-NEXT: ---- // - // CHECK-NEXT: ( 1.1, 0, 3.1 ) - // CHECK-NEXT: ( 1.2, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 3.3 ) - // CHECK-NEXT: ( 1.4, 0, 3.4 ) - // - %x = sparse_tensor.convert %0 : tensor<4x3xf64, #DCSR> to tensor<4x3xf64> - scf.for %i = %c0 to %c4 step %c1 { - %v1 = vector.transfer_read %x[%i, %c0], %du: tensor<4x3xf64>, vector<3xf64> - vector.print %v1 : vector<3xf64> - } - %y = sparse_tensor.convert %1 : tensor<4x3xf64, #DCSR> to tensor<4x3xf64> - scf.for %i = %c0 to %c4 step %c1 { - %v2 = vector.transfer_read %y[%i, %c0], %du: tensor<4x3xf64>, vector<3xf64> - vector.print %v2 : vector<3xf64> - } + sparse_tensor.print %0 : tensor<4x3xf64, #DCSR> + sparse_tensor.print %1 : tensor<4x3xf64, #DCSR> // Release resources. bufferization.dealloc_tensor %a : tensor<3x4xf64, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir index dba897334830ad..cc6f6a068746d0 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -31,7 +31,7 @@ // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} #SortedCOO = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) }> module { @@ -52,7 +52,7 @@ module { return %1 : tensor<5x10xf32, #SortedCOO> } - func.func @entry() { + func.func @main() { %f0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -79,17 +79,27 @@ module { // // Verify original and transposed sorted COO. // - // CHECK: ( 10, 20, 30, 40, 50, 11, 21, 31, 41, 51, 12, 22, 32, 42, 52, 13, 23, 33, 43, 53, 14, 24, 34, 44, 54, 15, 25, 35, 45, 55, 16, 26, 36, 46, 56, 17, 27, 37, 47, 57, 18, 28, 38, 48, 58, 19, 29, 39, 49, 59 ) - // CHECK-NEXT: ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 50 + // CHECK-NEXT: dim = ( 10, 5 ) + // CHECK-NEXT: lvl = ( 10, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 50 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 + // CHECK-NEXT: values : ( 10, 20, 30, 40, 50, 11, 21, 31, 41, 51, 12, 22, 32, 42, 52, 13, 23, 33, 43, 53, 14, 24, 34, 44, 54, 15, 25, 35, 45, 55, 16, 26, 36, 46, 56, 17, 27, 37, 47, 57, 18, 28, 38, 48, 58, 19, 29, 39, 49, 59 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 50 + // CHECK-NEXT: dim = ( 5, 10 ) + // CHECK-NEXT: lvl = ( 5, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 50 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: values : ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 + // CHECK-NEXT: ---- // - %va = sparse_tensor.values %SA - : tensor<10x5xf32, #SortedCOO> to memref - %vat = sparse_tensor.values %SAT - : tensor<5x10xf32, #SortedCOO> to memref - %v1 = vector.transfer_read %va[%c0], %f0 : memref, vector<50xf32> - %v2 = vector.transfer_read %vat[%c0], %f0 : memref, vector<50xf32> - vector.print %v1 : vector<50xf32> - vector.print %v2 : vector<50xf32> + sparse_tensor.print %SA : tensor<10x5xf32, #SortedCOO> + sparse_tensor.print %SAT : tensor<5x10xf32, #SortedCOO> // Release resources. bufferization.dealloc_tensor %SA : tensor<10x5xf32, #SortedCOO> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir index e03f99253b7845..3da1e35818cfa5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -204,54 +204,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec_f64(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<32xf64> - vector.print %1 : vector<32xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %3 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dumps a sparse vector of type i32. - func.func @dump_vec_i32(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0 : i32 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<24xi32> - vector.print %1 : vector<24xi32> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xi32> - vector.print %3 : vector<32xi32> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<4x8xf64> - vector.print %3 : vector<4x8xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %cmu = arith.constant -99 : i32 %c0 = arith.constant 0 : index @@ -289,26 +243,66 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) - // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 ) - // CHECK-NEXT: ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 ) - // CHECK-NEXT: ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 ) - // CHECK-NEXT: ( 0, 6, 33, 68, 100, 126, 196, 232, 279, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 68, 0, 0, 100, 126, 0, 0, 0, 0, 0, 0, 196, 232, 0, 279 ) - // CHECK-NEXT: ( 3, 3, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 3, 3, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 3 ), ( 0, 0, 4, 0, 5, 0, 0, 6 ), ( 7, 0, 7, 7, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 99, 99, 99, 99, 5, 6, 99, 99, 99, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 99, 99, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 99 ), ( 0, 0, 99, 0, 5, 0, 0, 6 ), ( 99, 0, 99, 99, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 23 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 23 + // CHECK-NEXT: crd[0] : ( 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 22, 23, 24, 25, 26, 27, 30 + // CHECK-NEXT: values : ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 32 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + // CHECK-NEXT: values : ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 0, 6, 33, 68, 100, 126, 196, 232, 279 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 8 ) + // CHECK-NEXT: lvl = ( 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 3, 3, 3, 4, 5, 6, 7, 7, 7 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 8 ) + // CHECK-NEXT: lvl = ( 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 99, 99, 99, 99, 5, 6, 99, 99, 99 + // CHECK-NEXT: ---- // CHECK-NEXT: ( 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 ) // - call @dump_vec_f64(%sv1) : (tensor) -> () - call @dump_vec_i32(%0) : (tensor) -> () - call @dump_vec_f64(%1) : (tensor) -> () - call @dump_vec_f64(%2) : (tensor) -> () - call @dump_mat(%3) : (tensor) -> () - call @dump_mat(%4) : (tensor) -> () + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %0 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor + sparse_tensor.print %3 : tensor + sparse_tensor.print %4 : tensor %v = vector.transfer_read %5[%c0], %cmu: tensor, vector<32xi32> vector.print %v : vector<32xi32> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir index d9ca2dca85342a..55332333164130 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -162,24 +162,8 @@ module { return %0 : tensor } - // Dumps a sparse vector. - func.func @dump(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %2 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %d1 = arith.constant 1.1 : f64 @@ -221,31 +205,69 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) - // CHECK-NEXT: ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 11, 0, 12, 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 17, 0, 0, 0, 0, 0, 0, 18, 19, 0, 20 ) - // CHECK-NEXT: ( 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 8, 0, 0, 10, 12, 0, 0, 0, 0, 0, 0, 14, 16, 0, 18 ) - // CHECK-NEXT: ( 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 8, 0, 0, 10, 12, 0, 0, 0, 0, 0, 0, 14, 16, 0, 18 ) - // CHECK-NEXT: ( 2, 11, 16, 13, 14, 6, 15, 8, 16, 10, 29, 32, 35, 38, 0, 0 ) - // CHECK-NEXT: ( 2, 11, 0, 16, 13, 0, 0, 0, 0, 0, 14, 6, 0, 0, 0, 0, 15, 8, 16, 0, 10, 29, 0, 0, 0, 0, 0, 0, 32, 35, 0, 38 ) - // CHECK-NEXT: ( 48, 204, 252, 304, 360, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 ) - // CHECK-NEXT: ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 10 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 10 + // CHECK-NEXT: crd[0] : ( 1, 3, 4, 10, 16, 18, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 14 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 14 + // CHECK-NEXT: crd[0] : ( 0, 1, 3, 4, 10, 11, 16, 17, 18, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 11, 16, 13, 14, 6, 15, 8, 16, 10, 29, 32, 35, 38 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 3, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 48, 204, 252, 304, 360 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: values : ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 + // CHECK-NEXT: ---- // CHECK-NEXT: 1169.1 // - - call @dump(%sv1) : (tensor) -> () - call @dump(%sv2) : (tensor) -> () - call @dump(%0) : (tensor) -> () - call @dump(%1) : (tensor) -> () - call @dump(%2) : (tensor) -> () - call @dump(%3) : (tensor) -> () - %m4 = sparse_tensor.values %4 : tensor to memref - %v4 = vector.load %m4[%c0]: memref, vector<32xf64> - vector.print %v4 : vector<32xf64> + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %sv2 : tensor + sparse_tensor.print %0 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor + sparse_tensor.print %3 : tensor + sparse_tensor.print %4 : tensor %v5 = tensor.extract %5[] : tensor vector.print %v5 : f64 From fc837f7a2dbdfca472bd1275362052facfe331a0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Mar 2024 19:05:50 +0000 Subject: [PATCH 078/158] [gn build] Port a6a6fca7911f --- .../gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn index 00e1888da64d26..131308db2aa557 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn @@ -23,12 +23,13 @@ static_library("Instrumentation") { "InstrProfiling.cpp", "Instrumentation.cpp", "KCFI.cpp", - "PGOForceFunctionAttrs.cpp", "MemProfiler.cpp", "MemorySanitizer.cpp", + "PGOForceFunctionAttrs.cpp", "PGOInstrumentation.cpp", "PGOMemOPSizeOpt.cpp", "PoisonChecking.cpp", + "RemoveTrapsPass.cpp", "SanitizerBinaryMetadata.cpp", "SanitizerCoverage.cpp", "ThreadSanitizer.cpp", From 3e73a080fa23594c81ad1dc61a65a11c9c488c5b Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 5 Mar 2024 19:47:38 -0600 Subject: [PATCH 079/158] [X86] Add tests for folding `(icmp ult (add x,-C),2)` -> `(or (icmp eq X,C), (icmp eq X,C+1))`; NFC --- llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll | 786 +++++++++++++++++++ 1 file changed, 786 insertions(+) create mode 100644 llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll new file mode 100644 index 00000000000000..3578a49759bf56 --- /dev/null +++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll @@ -0,0 +1,786 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 + +declare void @use.v4.i32(<4 x i32>) + +define <4 x i32> @eq_or_eq_ult_2(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291] +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2: +; SSE41: # %bb.0: +; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2: +; SSE2: # %bb.0: +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_2_only_transform_sse2(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $24, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 32 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: callq use.v4.i32@PLT +; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295] +; AVX512-NEXT: addq $24, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 8 +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX1: # %bb.0: +; AVX1-NEXT: subq $24, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 32 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX1-NEXT: callq use.v4.i32@PLT +; AVX1-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: addq $24, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX2: # %bb.0: +; AVX2-NEXT: subq $24, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 32 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX2-NEXT: callq use.v4.i32@PLT +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1] +; AVX2-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX2-NEXT: vpminud %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: addq $24, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_fail_multiuse: +; SSE41: # %bb.0: +; SSE41-NEXT: subq $24, %rsp +; SSE41-NEXT: .cfi_def_cfa_offset 32 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE41-NEXT: callq use.v4.i32@PLT +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [1,1,1,1] +; SSE41-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; SSE41-NEXT: pminud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: addq $24, %rsp +; SSE41-NEXT: .cfi_def_cfa_offset 8 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_fail_multiuse: +; SSE2: # %bb.0: +; SSE2-NEXT: subq $24, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 32 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: callq use.v4.i32@PLT +; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: addq $24, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 8 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + call void @use.v4.i32(<4 x i32> %x_adj) + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_3_fail(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_3_fail: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_3_fail: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_3_fail: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_3_fail: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_3_fail: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ugt_m3(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ugt_m3: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ugt_m3: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ugt_m3: +; AVX2: # %bb.0: +; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294] +; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ugt_m3: +; SSE41: # %bb.0: +; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294] +; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ugt_m3: +; SSE2: # %bb.0: +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ugt <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ule_1(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ule_1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ule_1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ule_1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ule_1: +; SSE41: # %bb.0: +; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ule_1: +; SSE2: # %bb.0: +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ule <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <2 x i64> @eq_or_eq_uge_m2_i64(<2 x i64> %x) { +; AVX512-LABEL: eq_or_eq_uge_m2_i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_uge_m2_i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775806,9223372036854775806] +; AVX1-NEXT: # xmm1 = mem[0,0] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_uge_m2_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775806,9223372036854775806] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_m2_i64: +; SSE41: # %bb.0: +; SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372034707292158,9223372034707292158] +; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_m2_i64: +; SSE2: # %bb.0: +; SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372034707292158,9223372034707292158] +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <2 x i64> %x, + %cmp = icmp uge <2 x i64> %x_adj, + %r = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %r +} + + +define <2 x i64> @eq_or_eq_uge_m2_i64_m1(<2 x i64> %x) { +; AVX512-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775806,9223372036854775806] +; AVX1-NEXT: # xmm2 = mem[0,0] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775806,9223372036854775806] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_m2_i64_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddq %xmm1, %xmm0 +; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292158,9223372034707292158] +; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE41-NEXT: pand %xmm3, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_m2_i64_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddq %xmm1, %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292158,9223372034707292158] +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE2-NEXT: pand %xmm3, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <2 x i64> %x, + %cmp = icmp uge <2 x i64> %x_adj, + %r = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %r +} + +define <4 x i32> @eq_or_eq_uge_2_fail_(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_uge_2_fail_: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_uge_2_fail_: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_uge_2_fail_: +; AVX2: # %bb.0: +; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_2_fail_: +; SSE41: # %bb.0: +; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2] +; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_2_fail_: +; SSE2: # %bb.0: +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp uge <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + + +define <8 x i32> @eq_or_eq_ult_2_256(<8 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291] +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminud %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967291,4294967291,4294967291,4294967291,4294967291,4294967291,4294967291,4294967291] +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] +; SSE41-NEXT: paddd %xmm2, %xmm1 +; SSE41-NEXT: paddd %xmm2, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [1,1,1,1] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pminud %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE41-NEXT: pminud %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] +; SSE2-NEXT: paddd %xmm2, %xmm1 +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <8 x i32> %x, + %cmp = icmp ult <8 x i32> %x_adj, + %r = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %r +} + + +define <8 x i32> @eq_or_eq_ult_2_256_m1(<8 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; AVX512-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256_m1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminud %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256_m1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: paddd %xmm2, %xmm1 +; SSE41-NEXT: paddd %xmm2, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [1,1,1,1] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pminud %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE41-NEXT: pminud %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: paddd %xmm2, %xmm1 +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <8 x i32> %x, + %cmp = icmp ult <8 x i32> %x_adj, + %r = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %r +} + + +define <32 x i8> @eq_or_eq_ult_2_256_i8_m1(<32 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminub %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256_i8_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: paddb %xmm2, %xmm1 +; SSE41-NEXT: paddb %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pminub %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqb %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256_i8_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pminub %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE2-NEXT: pminub %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <32 x i8> %x, + %cmp = icmp ult <32 x i8> %x_adj, + %r = sext <32 x i1> %cmp to <32 x i8> + ret <32 x i8> %r +} + + +define <16 x i8> @eq_or_eq_ult_2_128_i8_m1(<16 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_128_i8_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ult_2_128_i8_m1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_128_i8_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddb %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_128_i8_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: pminub %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <16 x i8> %x, + %cmp = icmp ult <16 x i8> %x_adj, + %r = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %r +} + + +define <16 x i8> @eq_or_eq_ult_2_128_i8(<16 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_128_i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ult_2_128_i8: +; AVX: # %bb.0: +; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_128_i8: +; SSE41: # %bb.0: +; SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_128_i8: +; SSE2: # %bb.0: +; SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: pminub %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <16 x i8> %x, + %cmp = icmp ult <16 x i8> %x_adj, + %r = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %r +} From 9f96db8e310f79ec450c9cf6e6311f576dfd1d51 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 5 Mar 2024 19:44:41 -0600 Subject: [PATCH 080/158] [X86] Fold `(icmp ult (add x,-C),2)` -> `(or (icmp eq X,C), (icmp eq X,C+1))` for Vectors This is undoing a middle-end transform which does the opposite. Since X86 doesn't have unsigned vector comparison instructions pre-AVX512, the simplified form gets worse codegen. Fixes #66479 Proofs: https://alive2.llvm.org/ce/z/UCz3wt Closes #84104 Closes #66479 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 63 ++++ llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll | 374 ++++++++----------- 2 files changed, 214 insertions(+), 223 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 94c4bbc4a09993..e1e6c22eb8cca5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53441,6 +53441,69 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget)) return R; + // In the middle end transforms: + // `(or (icmp eq X, C), (icmp eq X, C+1))` + // -> `(icmp ult (add x, -C), 2)` + // Likewise inverted cases with `ugt`. + // + // Since x86, pre avx512, doesn't have unsigned vector compares, this results + // in worse codegen. So, undo the middle-end transform and go back to `(or + // (icmp eq), (icmp eq))` form. + // Also skip AVX1 with ymm vectors, as the umin approach combines better than + // the xmm approach. + // + // NB: We don't handle the similiar simplication of `(and (icmp ne), (icmp + // ne))` as it doesn't end up instruction positive. + // TODO: We might want to do this for avx512 as well if we `sext` the result. + if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() && + ISD::isUnsignedIntSetCC(CC) && LHS.getOpcode() == ISD::ADD && + !Subtarget.hasAVX512() && + (OpVT.getSizeInBits() <= 128 || !Subtarget.hasAVX() || + Subtarget.hasAVX2()) && + LHS.hasOneUse()) { + + APInt CmpC; + SDValue AddC = LHS.getOperand(1); + if (ISD::isConstantSplatVector(RHS.getNode(), CmpC) && + DAG.isConstantIntBuildVectorOrConstantInt(AddC)) { + // See which form we have depending on the constant/condition. + SDValue C0 = SDValue(); + SDValue C1 = SDValue(); + + // If we had `(add x, -1)` and can lower with `umin`, don't transform as + // we will end up generating an additional constant. Keeping in the + // current form has a slight latency cost, but it probably worth saving a + // constant. + if (ISD::isConstantSplatVectorAllOnes(AddC.getNode()) && + DAG.getTargetLoweringInfo().isOperationLegal(ISD::UMIN, OpVT)) { + // Pass + } + // Normal Cases + else if ((CC == ISD::SETULT && CmpC == 2) || + (CC == ISD::SETULE && CmpC == 1)) { + // These will constant fold. + C0 = DAG.getNegative(AddC, DL, OpVT); + C1 = DAG.getNode(ISD::SUB, DL, OpVT, C0, + DAG.getAllOnesConstant(DL, OpVT)); + } + // Inverted Cases + else if ((CC == ISD::SETUGT && (-CmpC) == 3) || + (CC == ISD::SETUGE && (-CmpC) == 2)) { + // These will constant fold. + C0 = DAG.getNOT(DL, AddC, OpVT); + C1 = DAG.getNode(ISD::ADD, DL, OpVT, C0, + DAG.getAllOnesConstant(DL, OpVT)); + } + if (C0 && C1) { + SDValue NewLHS = + DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ); + SDValue NewRHS = + DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ); + return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS); + } + } + } + // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early // to avoid scalarization via legalization because v4i32 is not a legal type. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll index 3578a49759bf56..527995bc2139ec 100644 --- a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll +++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll @@ -18,35 +18,34 @@ define <4 x i32> @eq_or_eq_ult_2(<4 x i32> %x) { ; ; AVX1-LABEL: eq_or_eq_ult_2: ; AVX1: # %bb.0: -; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: eq_or_eq_ult_2: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291] -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5] +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_ult_2: ; SSE41: # %bb.0: -; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] -; SSE41-NEXT: pminud %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [6,6,6,6] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ult_2: ; SSE2: # %bb.0: -; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650] -; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq %x_adj = add <4 x i32> %x, %cmp = icmp ult <4 x i32> %x_adj, @@ -91,11 +90,10 @@ define <4 x i32> @eq_or_eq_ult_2_only_transform_sse2(<4 x i32> %x) { ; ; SSE2-LABEL: eq_or_eq_ult_2_only_transform_sse2: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,2,2,2] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq %x_adj = add <4 x i32> %x, %cmp = icmp ult <4 x i32> %x_adj, @@ -247,34 +245,27 @@ define <4 x i32> @eq_or_eq_ugt_m3(<4 x i32> %x) { ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq ; -; AVX1-LABEL: eq_or_eq_ugt_m3: -; AVX1: # %bb.0: -; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: eq_or_eq_ugt_m3: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294] -; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: eq_or_eq_ugt_m3: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_ugt_m3: ; SSE41: # %bb.0: -; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294] -; SSE41-NEXT: pmaxud %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [9,12,9,9] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ugt_m3: ; SSE2: # %bb.0: -; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9,12,9,9] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq %x_adj = add <4 x i32> %x, %cmp = icmp ugt <4 x i32> %x_adj, @@ -291,36 +282,27 @@ define <4 x i32> @eq_or_eq_ule_1(<4 x i32> %x) { ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq ; -; AVX1-LABEL: eq_or_eq_ule_1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: eq_or_eq_ule_1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: eq_or_eq_ule_1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_ule_1: ; SSE41: # %bb.0: -; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] -; SSE41-NEXT: pminud %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [0,4294967295,4294967294,4294967293] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ule_1: ; SSE2: # %bb.0: -; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,4294967294,4294967293] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq %x_adj = add <4 x i32> %x, %cmp = icmp ule <4 x i32> %x_adj, @@ -337,57 +319,31 @@ define <2 x i64> @eq_or_eq_uge_m2_i64(<2 x i64> %x) { ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq ; -; AVX1-LABEL: eq_or_eq_uge_m2_i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775806,9223372036854775806] -; AVX1-NEXT: # xmm1 = mem[0,0] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: eq_or_eq_uge_m2_i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775806,9223372036854775806] -; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: eq_or_eq_uge_m2_i64: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_uge_m2_i64: ; SSE41: # %bb.0: -; SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372034707292158,9223372034707292158] -; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [18446744073709551613,18446744073709551612] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_uge_m2_i64: ; SSE2: # %bb.0: -; SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372034707292158,9223372034707292158] -; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551613,18446744073709551612] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq %x_adj = add <2 x i64> %x, %cmp = icmp uge <2 x i64> %x_adj, @@ -405,57 +361,35 @@ define <2 x i64> @eq_or_eq_uge_m2_i64_m1(<2 x i64> %x) { ; AVX512-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq ; -; AVX1-LABEL: eq_or_eq_uge_m2_i64_m1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775806,9223372036854775806] -; AVX1-NEXT: # xmm2 = mem[0,0] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: eq_or_eq_uge_m2_i64_m1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775806,9223372036854775806] -; AVX2-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_uge_m2_i64_m1: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: paddq %xmm1, %xmm0 -; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292158,9223372034707292158] -; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 -; SSE41-NEXT: pand %xmm3, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_uge_m2_i64_m1: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: paddq %xmm1, %xmm0 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292158,9223372034707292158] -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 -; SSE2-NEXT: pand %xmm3, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq %x_adj = add <2 x i64> %x, %cmp = icmp uge <2 x i64> %x_adj, @@ -536,40 +470,37 @@ define <8 x i32> @eq_or_eq_ult_2_256(<8 x i32> %x) { ; ; AVX2-LABEL: eq_or_eq_ult_2_256: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967291,4294967291,4294967291,4294967291,4294967291,4294967291,4294967291,4294967291] -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5] +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_ult_2_256: ; SSE41: # %bb.0: -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] -; SSE41-NEXT: paddd %xmm2, %xmm1 -; SSE41-NEXT: paddd %xmm2, %xmm0 -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [1,1,1,1] +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [6,6,6,6] ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pminud %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 -; SSE41-NEXT: pminud %xmm1, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [5,5,5,5] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ult_2_256: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483650,2147483650,2147483650,2147483650] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [6,6,6,6] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,5,5,5] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: retq %x_adj = add <8 x i32> %x, %cmp = icmp ult <8 x i32> %x_adj, @@ -612,31 +543,28 @@ define <8 x i32> @eq_or_eq_ult_2_256_m1(<8 x i32> %x) { ; ; SSE41-LABEL: eq_or_eq_ult_2_256_m1: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: paddd %xmm2, %xmm1 -; SSE41-NEXT: paddd %xmm2, %xmm0 -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [1,1,1,1] +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [2,2,2,2] ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pminud %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 -; SSE41-NEXT: pminud %xmm1, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ult_2_256_m1: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483650,2147483650,2147483650,2147483650] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: retq %x_adj = add <8 x i32> %x, %cmp = icmp ult <8 x i32> %x_adj, @@ -678,28 +606,28 @@ define <32 x i8> @eq_or_eq_ult_2_256_i8_m1(<32 x i8> %x) { ; ; SSE41-LABEL: eq_or_eq_ult_2_256_i8_m1: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: paddb %xmm2, %xmm1 -; SSE41-NEXT: paddb %xmm2, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pminub %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 -; SSE41-NEXT: pminub %xmm1, %xmm2 -; SSE41-NEXT: pcmpeqb %xmm2, %xmm1 +; SSE41-NEXT: pcmpeqb %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pcmpeqb %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqb %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqb %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ult_2_256_i8_m1: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: paddb %xmm2, %xmm1 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pminub %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqb %xmm3, %xmm0 -; SSE2-NEXT: pminub %xmm1, %xmm2 -; SSE2-NEXT: pcmpeqb %xmm2, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: pcmpeqb %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqb %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: retq %x_adj = add <32 x i8> %x, %cmp = icmp ult <32 x i8> %x_adj, @@ -759,25 +687,25 @@ define <16 x i8> @eq_or_eq_ult_2_128_i8(<16 x i8> %x) { ; ; AVX-LABEL: eq_or_eq_ult_2_128_i8: ; AVX: # %bb.0: -; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; SSE41-LABEL: eq_or_eq_ult_2_128_i8: ; SSE41: # %bb.0: -; SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE41-NEXT: pminub %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232] +; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_eq_ult_2_128_i8: ; SSE2: # %bb.0: -; SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE2-NEXT: pminub %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232] +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq %x_adj = add <16 x i8> %x, %cmp = icmp ult <16 x i8> %x_adj, From 00f412168cf6aee234615a11d1424fc58221eb78 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Thu, 7 Mar 2024 11:15:16 -0800 Subject: [PATCH 081/158] [ORC][JITLink] Add Intel VTune support to JITLink (#83957) [ORC] Re-land https://github.com/llvm/llvm-project/pull/81826 This patch adds two plugins: VTuneSupportPlugin.cpp and JITLoaderVTune.cpp. The testing is done in a manner similar to llvm-jitlistener. Currently, we only support the old version of Intel VTune API. --- .../Orc/Debugging/VTuneSupportPlugin.h | 66 ++++++ .../Orc/Shared/VTuneSharedStructs.h | 102 ++++++++ .../Orc/TargetProcess/JITLoaderVTune.h | 31 +++ .../Orc/Debugging/CMakeLists.txt | 1 + .../Orc/Debugging/VTuneSupportPlugin.cpp | 185 +++++++++++++++ .../Orc/TargetProcess/CMakeLists.txt | 9 + .../Orc/TargetProcess/JITLoaderVTune.cpp | 224 ++++++++++++++++++ .../JITLink/x86-64/ELF_vtune.s | 52 ++++ .../JITLink/x86-64/lit.local.cfg | 3 + llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 19 +- 10 files changed, 691 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h create mode 100644 llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp create mode 100644 llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp create mode 100644 llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h new file mode 100644 index 00000000000000..9deb38a1a71fb1 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h @@ -0,0 +1,66 @@ +//===--- VTuneSupportPlugin.h -- Support for VTune profiler ---*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with VIntel Tune's Amplifier JIT API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGING_VTUNESUPPORT_H +#define LLVM_EXECUTIONENGINE_ORC_DEBUGGING_VTUNESUPPORT_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h" + +namespace llvm { + +namespace orc { + +class VTuneSupportPlugin : public ObjectLinkingLayer::Plugin { +public: + VTuneSupportPlugin(ExecutorProcessControl &EPC, ExecutorAddr RegisterImplAddr, + ExecutorAddr UnregisterImplAddr, bool EmitDebugInfo) + : EPC(EPC), RegisterVTuneImplAddr(RegisterImplAddr), + UnregisterVTuneImplAddr(UnregisterImplAddr), + EmitDebugInfo(EmitDebugInfo) {} + + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &Config) override; + + Error notifyEmitted(MaterializationResponsibility &MR) override; + Error notifyFailed(MaterializationResponsibility &MR) override; + Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override; + void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) override; + + static Expected> + Create(ExecutorProcessControl &EPC, JITDylib &JD, bool EmitDebugInfo, + bool TestMode = false); + +private: + ExecutorProcessControl &EPC; + ExecutorAddr RegisterVTuneImplAddr; + ExecutorAddr UnregisterVTuneImplAddr; + std::mutex PluginMutex; + uint64_t NextMethodID = 0; + DenseMap> + PendingMethodIDs; + DenseMap>> + LoadedMethodIDs; + bool EmitDebugInfo; +}; + +} // end namespace orc + +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h new file mode 100644 index 00000000000000..667d3446faff74 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h @@ -0,0 +1,102 @@ +//===-------------------- VTuneSharedStructs.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Structs and serialization to share VTune-related information +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_VTUNESHAREDSTRUCTS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_VTUNESHAREDSTRUCTS_H + +namespace llvm { +namespace orc { + +using VTuneLineTable = std::vector>; + +// SI = String Index, 1-indexed into the VTuneMethodBatch::Strings table. +// SI == 0 means replace with nullptr. + +// MI = Method Index, 1-indexed into the VTuneMethodBatch::Methods table. +// MI == 0 means this is a parent method and was not inlined. + +struct VTuneMethodInfo { + VTuneLineTable LineTable; + ExecutorAddr LoadAddr; + uint64_t LoadSize; + uint64_t MethodID; + uint32_t NameSI; + uint32_t ClassFileSI; + uint32_t SourceFileSI; + uint32_t ParentMI; +}; + +using VTuneMethodTable = std::vector; +using VTuneStringTable = std::vector; + +struct VTuneMethodBatch { + VTuneMethodTable Methods; + VTuneStringTable Strings; +}; + +using VTuneUnloadedMethodIDs = SmallVector>; + +namespace shared { + +using SPSVTuneLineTable = SPSSequence>; +using SPSVTuneMethodInfo = + SPSTuple; +using SPSVTuneMethodTable = SPSSequence; +using SPSVTuneStringTable = SPSSequence; +using SPSVTuneMethodBatch = SPSTuple; +using SPSVTuneUnloadedMethodIDs = SPSSequence>; + +template <> class SPSSerializationTraits { +public: + static size_t size(const VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::size( + MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } + + static bool deserialize(SPSInputBuffer &IB, VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::deserialize( + IB, MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } + + static bool serialize(SPSOutputBuffer &OB, const VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::serialize( + OB, MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } +}; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::size(MB.Methods, MB.Strings); + } + + static bool deserialize(SPSInputBuffer &IB, VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::deserialize(IB, MB.Methods, + MB.Strings); + } + + static bool serialize(SPSOutputBuffer &OB, const VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::serialize(OB, MB.Methods, + MB.Strings); + } +}; + +} // end namespace shared +} // end namespace orc +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h new file mode 100644 index 00000000000000..afb7df592faf27 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h @@ -0,0 +1,31 @@ + +//===------ JITLoaderVTune.h --- Register profiler objects ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the perf JIT interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H + +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size); + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H + + diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt index 5bf23a7ec0bc89..ed52692662a8a3 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMOrcDebugging DebuggerSupportPlugin.cpp LLJITUtilsCBindings.cpp PerfSupportPlugin.cpp + VTuneSupportPlugin.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc/Debugging/ diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp new file mode 100644 index 00000000000000..30a9728c8c20e3 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp @@ -0,0 +1,185 @@ +//===--- VTuneSupportPlugin.cpp -- Support for VTune profiler --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with VIntel Tune's Amplfiier JIT API. +// +//===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h" + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::jitlink; + +static constexpr StringRef RegisterVTuneImplName = "llvm_orc_registerVTuneImpl"; +static constexpr StringRef UnregisterVTuneImplName = + "llvm_orc_unregisterVTuneImpl"; +static constexpr StringRef RegisterTestVTuneImplName = + "llvm_orc_test_registerVTuneImpl"; + +static VTuneMethodBatch getMethodBatch(LinkGraph &G, bool EmitDebugInfo) { + VTuneMethodBatch Batch; + std::unique_ptr DC; + StringMap> DCBacking; + if (EmitDebugInfo) { + auto EDC = createDWARFContext(G); + if (!EDC) { + EmitDebugInfo = false; + } else { + DC = std::move(EDC->first); + DCBacking = std::move(EDC->second); + } + } + + auto GetStringIdx = [Deduplicator = StringMap(), + &Batch](StringRef S) mutable { + auto I = Deduplicator.find(S); + if (I != Deduplicator.end()) + return I->second; + + Batch.Strings.push_back(S.str()); + return Deduplicator[S] = Batch.Strings.size(); + }; + for (auto Sym : G.defined_symbols()) { + if (!Sym->isCallable()) + continue; + + Batch.Methods.push_back(VTuneMethodInfo()); + auto &Method = Batch.Methods.back(); + Method.MethodID = 0; + Method.ParentMI = 0; + Method.LoadAddr = Sym->getAddress(); + Method.LoadSize = Sym->getSize(); + Method.NameSI = GetStringIdx(Sym->getName()); + Method.ClassFileSI = 0; + Method.SourceFileSI = 0; + + if (!EmitDebugInfo) + continue; + + auto &Section = Sym->getBlock().getSection(); + auto Addr = Sym->getAddress(); + auto SAddr = + object::SectionedAddress{Addr.getValue(), Section.getOrdinal()}; + DILineInfoTable LinesInfo = DC->getLineInfoForAddressRange( + SAddr, Sym->getSize(), + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); + Method.SourceFileSI = Batch.Strings.size(); + Batch.Strings.push_back(DC->getLineInfoForAddress(SAddr).FileName); + for (auto &LInfo : LinesInfo) { + Method.LineTable.push_back( + std::pair{/*unsigned*/ Sym->getOffset(), + /*DILineInfo*/ LInfo.second.Line}); + } + } + return Batch; +} + +void VTuneSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR, + LinkGraph &G, + PassConfiguration &Config) { + Config.PostFixupPasses.push_back([this, MR = &MR](LinkGraph &G) { + // the object file is generated but not linked yet + auto Batch = getMethodBatch(G, EmitDebugInfo); + if (Batch.Methods.empty()) { + return Error::success(); + } + { + std::lock_guard Lock(PluginMutex); + uint64_t Allocated = Batch.Methods.size(); + uint64_t Start = NextMethodID; + NextMethodID += Allocated; + for (size_t i = Start; i < NextMethodID; ++i) { + Batch.Methods[i - Start].MethodID = i; + } + this->PendingMethodIDs[MR] = {Start, Allocated}; + } + G.allocActions().push_back( + {cantFail(shared::WrapperFunctionCall::Create< + shared::SPSArgList>( + RegisterVTuneImplAddr, Batch)), + {}}); + return Error::success(); + }); +} + +Error VTuneSupportPlugin::notifyEmitted(MaterializationResponsibility &MR) { + if (auto Err = MR.withResourceKeyDo([this, MR = &MR](ResourceKey K) { + std::lock_guard Lock(PluginMutex); + auto I = PendingMethodIDs.find(MR); + if (I == PendingMethodIDs.end()) + return; + + LoadedMethodIDs[K].push_back(I->second); + PendingMethodIDs.erase(I); + })) { + return Err; + } + return Error::success(); +} + +Error VTuneSupportPlugin::notifyFailed(MaterializationResponsibility &MR) { + std::lock_guard Lock(PluginMutex); + PendingMethodIDs.erase(&MR); + return Error::success(); +} + +Error VTuneSupportPlugin::notifyRemovingResources(JITDylib &JD, ResourceKey K) { + // Unregistration not required if not provided + if (!UnregisterVTuneImplAddr) { + return Error::success(); + } + VTuneUnloadedMethodIDs UnloadedIDs; + { + std::lock_guard Lock(PluginMutex); + auto I = LoadedMethodIDs.find(K); + if (I == LoadedMethodIDs.end()) + return Error::success(); + + UnloadedIDs = std::move(I->second); + LoadedMethodIDs.erase(I); + } + if (auto Err = EPC.callSPSWrapper( + UnregisterVTuneImplAddr, UnloadedIDs)) + return Err; + + return Error::success(); +} + +void VTuneSupportPlugin::notifyTransferringResources(JITDylib &JD, + ResourceKey DstKey, + ResourceKey SrcKey) { + std::lock_guard Lock(PluginMutex); + auto I = LoadedMethodIDs.find(SrcKey); + if (I == LoadedMethodIDs.end()) + return; + + auto &Dest = LoadedMethodIDs[DstKey]; + Dest.insert(Dest.end(), I->second.begin(), I->second.end()); + LoadedMethodIDs.erase(SrcKey); +} + +Expected> +VTuneSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD, + bool EmitDebugInfo, bool TestMode) { + auto &ES = EPC.getExecutionSession(); + auto RegisterImplName = + ES.intern(TestMode ? RegisterTestVTuneImplName : RegisterVTuneImplName); + auto UnregisterImplName = ES.intern(UnregisterVTuneImplName); + SymbolLookupSet SLS{RegisterImplName, UnregisterImplName}; + auto Res = ES.lookup(makeJITDylibSearchOrder({&JD}), std::move(SLS)); + if (!Res) + return Res.takeError(); + ExecutorAddr RegisterImplAddr( + Res->find(RegisterImplName)->second.getAddress()); + ExecutorAddr UnregisterImplAddr( + Res->find(UnregisterImplName)->second.getAddress()); + return std::make_unique( + EPC, RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo); +} diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt index f2005dc1775e3c..3d1dfe758c79dd 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt @@ -2,10 +2,18 @@ if( CMAKE_HOST_UNIX AND HAVE_LIBRT ) set(rt_lib rt) endif() +set(intel_jit_profiling ) +if( LLVM_USE_INTEL_JITEVENTS ) + set(intel_jit_profiling IntelJITProfiling) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../IntelJITProfiling) + include_directories(${PROJECT_BINARY_DIR}/ittapi/include/ ) +endif() + add_llvm_component_library(LLVMOrcTargetProcess ExecutorSharedMemoryMapperService.cpp JITLoaderGDB.cpp JITLoaderPerf.cpp + JITLoaderVTune.cpp OrcRTBootstrap.cpp RegisterEHFrames.cpp SimpleExecutorDylibManager.cpp @@ -21,6 +29,7 @@ add_llvm_component_library(LLVMOrcTargetProcess ${rt_lib} LINK_COMPONENTS + ${intel_jit_profiling} OrcShared Support TargetParser diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp new file mode 100644 index 00000000000000..d346214d3ae291 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp @@ -0,0 +1,224 @@ +//===------- JITLoaderVTune.cpp - Register profiler objects -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the VTune JIT interface. +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h" +#include "llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h" +#include + +#if LLVM_USE_INTEL_JITEVENTS +#include "IntelJITEventsWrapper.h" +#include "ittnotify.h" + +using namespace llvm; +using namespace llvm::orc; + +namespace { +class JITEventWrapper { +public: + static std::unique_ptr Wrapper; +}; +std::unique_ptr JITEventWrapper::Wrapper; +} // namespace + +static Error registerJITLoaderVTuneRegisterImpl(const VTuneMethodBatch &MB) { + const size_t StringsSize = MB.Strings.size(); + + for (const auto &MethodInfo : MB.Methods) { + iJIT_Method_Load MethodMessage; + memset(&MethodMessage, 0, sizeof(iJIT_Method_Load)); + + MethodMessage.method_id = MethodInfo.MethodID; + if (MethodInfo.NameSI != 0 && MethodInfo.NameSI < StringsSize) { + MethodMessage.method_name = + const_cast(MB.Strings.at(MethodInfo.NameSI).data()); + } else { + MethodMessage.method_name = NULL; + } + if (MethodInfo.ClassFileSI != 0 && MethodInfo.ClassFileSI < StringsSize) { + MethodMessage.class_file_name = + const_cast(MB.Strings.at(MethodInfo.ClassFileSI).data()); + } else { + MethodMessage.class_file_name = NULL; + } + if (MethodInfo.SourceFileSI != 0 && MethodInfo.SourceFileSI < StringsSize) { + MethodMessage.source_file_name = + const_cast(MB.Strings.at(MethodInfo.SourceFileSI).data()); + } else { + MethodMessage.source_file_name = NULL; + } + + MethodMessage.method_load_address = MethodInfo.LoadAddr.toPtr(); + MethodMessage.method_size = MethodInfo.LoadSize; + MethodMessage.class_id = 0; + + MethodMessage.user_data = NULL; + MethodMessage.user_data_size = 0; + MethodMessage.env = iJDE_JittingAPI; + + std::vector LineInfo; + for (const auto &LInfo : MethodInfo.LineTable) { + LineInfo.push_back(LineNumberInfo{LInfo.first, LInfo.second}); + } + + if (LineInfo.size() == 0) { + MethodMessage.line_number_size = 0; + MethodMessage.line_number_table = 0; + } else { + MethodMessage.line_number_size = LineInfo.size(); + MethodMessage.line_number_table = &*LineInfo.begin(); + } + JITEventWrapper::Wrapper->iJIT_NotifyEvent( + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &MethodMessage); + } + + return Error::success(); +} + +static void registerJITLoaderVTuneUnregisterImpl( + const std::vector> &UM) { + for (auto &Method : UM) { + JITEventWrapper::Wrapper->iJIT_NotifyEvent( + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + const_cast(&Method.first)); + } +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + if (!JITEventWrapper::Wrapper) + JITEventWrapper::Wrapper.reset(new IntelJITEventsWrapper); + + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneRegisterImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneUnregisterImpl) + .release(); +} + +// For Testing: following code comes from llvm-jitlistener.cpp in llvm tools +namespace { +using SourceLocations = std::vector>; +using NativeCodeMap = std::map; +NativeCodeMap ReportedDebugFuncs; +} // namespace + +static int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { + switch (EventType) { + case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: { + if (!EventSpecificData) { + errs() << "Error: The JIT event listener did not provide a event data."; + return -1; + } + iJIT_Method_Load *msg = static_cast(EventSpecificData); + + ReportedDebugFuncs[msg->method_id]; + + outs() << "Method load [" << msg->method_id << "]: " << msg->method_name + << ", Size = " << msg->method_size << "\n"; + + for (unsigned int i = 0; i < msg->line_number_size; ++i) { + if (!msg->line_number_table) { + errs() << "A function with a non-zero line count had no line table."; + return -1; + } + std::pair loc( + std::string(msg->source_file_name), + msg->line_number_table[i].LineNumber); + ReportedDebugFuncs[msg->method_id].push_back(loc); + outs() << " Line info @ " << msg->line_number_table[i].Offset << ": " + << msg->source_file_name << ", line " + << msg->line_number_table[i].LineNumber << "\n"; + } + outs() << "\n"; + } break; + case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: { + if (!EventSpecificData) { + errs() << "Error: The JIT event listener did not provide a event data."; + return -1; + } + unsigned int UnloadId = + *reinterpret_cast(EventSpecificData); + assert(1 == ReportedDebugFuncs.erase(UnloadId)); + outs() << "Method unload [" << UnloadId << "]\n"; + } break; + default: + break; + } + return 0; +} + +static iJIT_IsProfilingActiveFlags IsProfilingActive(void) { + // for testing, pretend we have an Intel Parallel Amplifier XE 2011 + // instance attached + return iJIT_SAMPLING_ON; +} + +static unsigned int GetNewMethodID(void) { + static unsigned int id = 0; + return ++id; +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + JITEventWrapper::Wrapper.reset(new IntelJITEventsWrapper( + NotifyEvent, NULL, NULL, IsProfilingActive, 0, 0, GetNewMethodID)); + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneRegisterImpl) + .release(); +} + +#else + +using namespace llvm; +using namespace llvm::orc; + +static Error unsupportedBatch(const VTuneMethodBatch &MB) { + return llvm::make_error("unsupported for Intel VTune", + inconvertibleErrorCode()); +} + +static void unsuppported(const std::vector> &UM) { + +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, unsupportedBatch) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle(Data, Size, + unsuppported) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, unsupportedBatch) + .release(); +} + +#endif diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s new file mode 100644 index 00000000000000..1c95bde51e1211 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s @@ -0,0 +1,52 @@ +# REQUIRES: native && x86_64-linux && intel-jitevents + +# RUN: rm -rf %t && mkdir %t +# RUN: llvm-mc -triple=x86_64-unknown-linux \ +# RUN: -filetype=obj -o %t/ELF_x86-64_vtune.o %s +# RUN: llvm-jitlink -vtune-support %t/ELF_x86-64_vtune.o | \ +# RUN: FileCheck %s + +# CHECK: Method load [0]: {{.*}}, Size = {{[0-9]+}} +# CHECK: Method unload [0] + .file "test.c" + .text + .globl main + .type main, @function +main: +.LFB0: + .cfi_startproc + endbr64 + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + movq %rsp, %rbp + .cfi_def_cfa_register 6 + movl %edi, -4(%rbp) + movq %rsi, -16(%rbp) + movl -4(%rbp), %ebx + addl $1, %ebx + movl $0, %eax + popq %rbp + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE0: + .size main, .-main + .ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0" + .section .note.GNU-stack,"",@progbits + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg index 42bf50dcc13c35..d5a1ad626b657d 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg @@ -1,2 +1,5 @@ if not "X86" in config.root.targets: config.unsupported = True + +if config.llvm_use_intel_jitevents: + config.available_features.add("intel-jitevents") diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index f0b8310a32efd3..09b2a5900eb0b7 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -21,6 +21,7 @@ #include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h" #include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h" #include "llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h" +#include "llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h" #include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h" #include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" @@ -34,6 +35,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -148,6 +150,10 @@ static cl::opt PerfSupport("perf-support", cl::init(false), cl::Hidden, cl::cat(JITLinkCategory)); +static cl::opt VTuneSupport("vtune-support", + cl::desc("Enable vtune profiling support"), + cl::init(false), cl::Hidden, + cl::cat(JITLinkCategory)); static cl::opt NoProcessSymbols("no-process-syms", cl::desc("Do not resolve to llvm-jitlink process symbols"), @@ -264,7 +270,10 @@ static LLVM_ATTRIBUTE_USED void linkComponents() { << (void *)&llvm_orc_registerJITLoaderGDBAllocAction << '\n' << (void *)&llvm_orc_registerJITLoaderPerfStart << '\n' << (void *)&llvm_orc_registerJITLoaderPerfEnd << '\n' - << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n'; + << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n' + << (void *)&llvm_orc_registerVTuneImpl << '\n' + << (void *)&llvm_orc_unregisterVTuneImpl << '\n' + << (void *)&llvm_orc_test_registerVTuneImpl << '\n'; } static bool UseTestResultOverride = false; @@ -1004,6 +1013,14 @@ Session::Session(std::unique_ptr EPC, Error &Err) this->ES.getExecutorProcessControl(), *ProcessSymsJD, true, true))); } + if (VTuneSupport && TT.isOSBinFormatELF()) { + ObjLayer.addPlugin(ExitOnErr(DebugInfoPreservationPlugin::Create())); + ObjLayer.addPlugin(ExitOnErr( + VTuneSupportPlugin::Create(this->ES.getExecutorProcessControl(), + *ProcessSymsJD, /*EmitDebugInfo=*/true, + /*TestMode=*/true))); + } + // Set up the platform. if (!OrcRuntime.empty()) { assert(ProcessSymsJD && "ProcessSymsJD should have been set"); From d64632becd159a4b816af2d5a01fd3531bd45f65 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Mar 2024 19:18:25 +0000 Subject: [PATCH 082/158] [gn build] Port 00f412168cf6 --- .../gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn | 1 + .../llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn index 1d3fc6cfdfaaa6..5610679ff333ee 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn @@ -13,6 +13,7 @@ static_library("Debugging") { "DebuggerSupportPlugin.cpp", "LLJITUtilsCBindings.cpp", "PerfSupportPlugin.cpp", + "VTuneSupportPlugin.cpp", ] if (current_os == "linux") { libs = [ "rt" ] diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn index f34855e8e1cc60..d62f5042c94638 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn @@ -9,6 +9,7 @@ static_library("TargetProcess") { "ExecutorSharedMemoryMapperService.cpp", "JITLoaderGDB.cpp", "JITLoaderPerf.cpp", + "JITLoaderVTune.cpp", "OrcRTBootstrap.cpp", "RegisterEHFrames.cpp", "SimpleExecutorDylibManager.cpp", From 4d31fbbb5af6528387fd5efd90363a408713108b Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 7 Mar 2024 11:07:59 -0800 Subject: [PATCH 083/158] [ORC] Propagate defineMaterializing failure when resource tracker is defunct. Remove an overly aggressive cantFail: This call to defineMaterializing should never fail with a duplicate symbols error (since all new symbols shoul be weak), but may fail if the tracker has become defunct in the mean time. In that case we need to propagate the error. --- llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index fffa95ee72b719..6ac256dff9b436 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -455,9 +455,10 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { ProcessSymbol(Sym); // Attempt to claim all weak defs that we're not already responsible for. - // This cannot fail -- any clashes will just result in rejection of our - // claim, at which point we'll externalize that symbol. - cantFail(MR->defineMaterializing(std::move(NewSymbolsToClaim))); + // This may fail if the resource tracker has become defunct, but should + // always succeed otherwise. + if (auto Err = MR->defineMaterializing(std::move(NewSymbolsToClaim))) + return Err; // Walk the list of symbols that we just tried to claim. Symbols that we're // responsible for are marked live. Symbols that we're not responsible for From 9286665f7667a00062ef7bb8d1eec0fa02d2602c Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 7 Mar 2024 11:43:33 -0800 Subject: [PATCH 084/158] AMDGPU: Use OtherPredicates for v_dot2_bf16_bf16(f16_f16) pseudo (#84354) This is because SubtargetPredicate is not copied from pseudo to dpp16 and dpp8 real. Actually this is the common issue for insts with _Realtriple_ --- We should avoid using SubtargetPredicate to define pseudo: the predicate will be lost in real. --- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 334cfad478f151..3340ded9d36000 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -893,7 +893,7 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let SubtargetPredicate = HasDot9Insts, IsDOT=1 in { +let OtherPredicates = [HasDot9Insts], IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile, int_amdgcn_fdot2_bf16_bf16>; } From b8b434b3e150f1c79b114893e36f8e447e560b80 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 7 Mar 2024 20:46:10 +0100 Subject: [PATCH 085/158] [GISEL] Silence unused variable warning. NFC --- llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index a5a136e2effc60..28e5bf85ca9ce6 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -744,8 +744,7 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, const SrcOp &Src) { - LLT DstTy = Res.getLLTTy(*getMRI()); - assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && + assert(Src.getLLTTy(*getMRI()) == Res.getLLTTy(*getMRI()).getElementType() && "Expected Src to match Dst elt ty"); return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); } From 4f85f620b6a92e657dc9f719158dbdcae561ead7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 7 Mar 2024 20:54:02 +0100 Subject: [PATCH 086/158] [bazel] Port 3714f937b835c06c8c32ca4f3f61ba2317db2296 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 9d6ca4ed932fe4..7a6bc2dc320255 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -542,6 +542,7 @@ mlir_c_api_cc_library( includes = ["include"], deps = [ ":LLVMDialect", + "//llvm:Support", ], ) From 57a337378f37fa3813992842714c9b06fae20af2 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 7 Mar 2024 14:57:35 -0500 Subject: [PATCH 087/158] [libc][c23] add memset_explicit (#83577) --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/spec/stdc.td | 5 +++ libc/src/string/CMakeLists.txt | 11 +++++++ libc/src/string/memset_explicit.cpp | 25 +++++++++++++++ libc/src/string/memset_explicit.h | 20 ++++++++++++ libc/test/src/string/CMakeLists.txt | 10 ++++++ libc/test/src/string/memset_explicit_test.cpp | 31 +++++++++++++++++++ 8 files changed, 104 insertions(+) create mode 100644 libc/src/string/memset_explicit.cpp create mode 100644 libc/src/string/memset_explicit.h create mode 100644 libc/test/src/string/memset_explicit_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 06832a41221dd8..c32773f67cda53 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.memset_explicit libc.src.string.rindex libc.src.string.stpcpy libc.src.string.stpncpy diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index bd2006ddb7e985..1f36f127e3c473 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.memset_explicit libc.src.string.rindex libc.src.string.stpcpy libc.src.string.stpncpy diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index a3a856a4ec8507..e09cce0efd9bcc 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -234,6 +234,11 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, + FunctionSpec< + "memset_explicit", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec] + >, FunctionSpec< "strcpy", RetValSpec, diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 1c893280e8a3c2..56588ffafb86f0 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -441,6 +441,17 @@ add_entrypoint_object( .memory_utils.inline_memcpy ) +add_entrypoint_object( + memset_explicit + SRCS + memset_explicit.cpp + HDRS + memset_explicit.h + DEPENDS + .string_utils + .memory_utils.inline_memset +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp new file mode 100644 index 00000000000000..a8656d1e791e84 --- /dev/null +++ b/libc/src/string/memset_explicit.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of memset_explicit ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memset_explicit.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/inline_memset.h" + +namespace LIBC_NAMESPACE { + +[[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit, + (void *dst, int value, size_t count)) { + // Use the inline memset function to set the memory. + inline_memset(dst, static_cast(value), count); + // avoid dead store elimination + // The asm itself should also be sufficient to behave as a compiler barrier. + asm("" : : "r"(dst) : "memory"); + return dst; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h new file mode 100644 index 00000000000000..f6c189761a123c --- /dev/null +++ b/libc/src/string/memset_explicit.h @@ -0,0 +1,20 @@ +//===-- Implementation header for memset_explicit ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H +#define LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H + +#include // size_t + +namespace LIBC_NAMESPACE { + +[[gnu::noinline]] void *memset_explicit(void *ptr, int value, size_t count); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index 6088289532d771..c1caec5fd912c8 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -418,6 +418,16 @@ add_libc_test( libc.src.string.strxfrm ) +add_libc_test( + memset_explicit_test + SUITE + libc-string-tests + SRCS + memset_explicit_test.cpp + DEPENDS + libc.src.string.memset_explicit +) + # Tests all implementations that can run on the target CPU. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/memset_explicit_test.cpp b/libc/test/src/string/memset_explicit_test.cpp new file mode 100644 index 00000000000000..bb5111bd639e3a --- /dev/null +++ b/libc/test/src/string/memset_explicit_test.cpp @@ -0,0 +1,31 @@ +//===-- Unittests for memset_explicit -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "memory_utils/memory_check_utils.h" +#include "src/string/memset_explicit.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE { + +// Apply the same tests as memset + +static inline void Adaptor(cpp::span p1, uint8_t value, size_t size) { + LIBC_NAMESPACE::memset_explicit(p1.begin(), value, size); +} + +TEST(LlvmLibcmemsetExplicitTest, SizeSweep) { + static constexpr size_t kMaxSize = 400; + Buffer DstBuffer(kMaxSize); + for (size_t size = 0; size < kMaxSize; ++size) { + const char value = size % 10; + auto dst = DstBuffer.span().subspan(0, size); + ASSERT_TRUE((CheckMemset(dst, value, size))); + } +} + +} // namespace LIBC_NAMESPACE From 308a2360725948fd6c77d005110c169ab1a8322c Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Thu, 7 Mar 2024 21:01:46 +0100 Subject: [PATCH 088/158] [clang-tidy] `isOnlyUsedAsConst`: Handle static method calls. (#84005) ... using method syntax: ``` struct S { static void f() }; void DoIt(S& s) { s.f(); // Does not mutate `s` through the `this` parameter. } ``` --- clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp | 9 +++++---- .../unittests/clang-tidy/DeclRefExprUtilsTest.cpp | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp index f0ffa517047b27..a48e45e1356813 100644 --- a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp @@ -155,15 +155,16 @@ AST_MATCHER_P(DeclRefExpr, doesNotMutateObject, int, Indirections) { if (const auto *const Member = dyn_cast(P)) { if (const auto *const Method = dyn_cast(Member->getMemberDecl())) { - if (!Method->isConst()) { - // The method can mutate our variable. - return false; + if (Method->isConst() || Method->isStatic()) { + // The method call cannot mutate our variable. + continue; } - continue; + return false; } Stack.emplace_back(Member, 0); continue; } + if (const auto *const Op = dyn_cast(P)) { switch (Op->getOpcode()) { case UO_AddrOf: diff --git a/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp index 4c9e81ea0f61ac..3d9f51e2e17b09 100644 --- a/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp @@ -51,6 +51,8 @@ template void RunTest(StringRef Snippet) { void constMethod() const; void nonConstMethod(); + static void staticMethod(); + void operator()(ConstTag) const; void operator()(NonConstTag); @@ -109,10 +111,12 @@ TEST(ConstReferenceDeclRefExprsTest, ConstValueVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); /*const*/target(ConstTag{}); /*const*/target[42]; useConstRef((/*const*/target)); (/*const*/target).constMethod(); + /*const*/target.staticMethod(); (void)(/*const*/target == /*const*/target); (void)/*const*/target; (void)&/*const*/target; @@ -140,6 +144,7 @@ TEST(ConstReferenceDeclRefExprsTest, ConstRefVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); /*const*/target(ConstTag{}); /*const*/target[42]; useConstRef((/*const*/target)); @@ -179,6 +184,7 @@ TEST(ConstReferenceDeclRefExprsTest, ValueVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); target.nonConstMethod(); /*const*/target(ConstTag{}); target[42]; @@ -218,6 +224,7 @@ TEST(ConstReferenceDeclRefExprsTest, RefVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); target.nonConstMethod(); /*const*/target(ConstTag{}); target[42]; @@ -256,6 +263,7 @@ TEST(ConstReferenceDeclRefExprsTest, PtrVar) { useConstPtrConstRef(/*const*/target); usePtrConstPtr(&target); /*const*/target->constMethod(); + /*const*/target->staticMethod(); target->nonConstMethod(); (*/*const*/target)(ConstTag{}); (*target)[42]; @@ -292,6 +300,7 @@ TEST(ConstReferenceDeclRefExprsTest, ConstPtrVar) { useConstPtrConstPtr(&/*const*/target); useConstPtrConstRef(/*const*/target); /*const*/target->constMethod(); + /*const*/target->staticMethod(); (*/*const*/target)(ConstTag{}); (*/*const*/target)[42]; /*const*/target->operator[](42); From a10fd16270b6fecf99b793318872e208c8b1abab Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Mar 2024 11:43:35 -0800 Subject: [PATCH 089/158] [CVP] Add test coverage for an upcoming generalization of expandUDivOrURem --- .../udiv-expansion.ll | 68 ++++++++++++++ .../urem-expansion.ll | 93 +++++++++++++++++++ 2 files changed, 161 insertions(+) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll index a2a767084fbff6..a5fc26ebab00f5 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll @@ -90,6 +90,74 @@ define i8 @constant.divisor.v7(i8 %x) { ret i8 %div } +define i8 @constant.divisor.v6to8(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to8( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 2 +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v9to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v9to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 3 +; + %cmp.x.lower = icmp uge i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v12to14(i8 %x) { +; CHECK-LABEL: @constant.divisor.v12to14( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 4 +; + %cmp.x.lower = icmp uge i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 15 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v6to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[DIV]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + ; Both are variable. Bounds are known define i8 @variable.v3(i8 %x, i8 %y) { diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll index cd0ba2f189dc83..2af8c8f23bbd3e 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll @@ -100,6 +100,74 @@ define i8 @constant.divisor.v7(i8 %x) { ret i8 %rem } +define i8 @constant.divisor.v6to8(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to8( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v9to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v9to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v12to14(i8 %x) { +; CHECK-LABEL: @constant.divisor.v12to14( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 15 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v6to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + ; Both are variable. Bounds are known define i8 @variable.v3(i8 %x, i8 %y) { @@ -226,6 +294,31 @@ define i8 @variable.v7(i8 %x, i8 %y) { ret i8 %rem } +define i8 @variable.v6to8.v3to4(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v6to8.v3to4( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 8 + call void @llvm.assume(i1 %cmp.x.upper) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} + ; Constant divisor define i8 @large.divisor.v0(i8 %x) { From 292a28df6c55679fad0589dea35278a8c66b2ae1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 7 Mar 2024 15:12:21 -0500 Subject: [PATCH 090/158] [libc++] Enable availability based on the compiler instead of __has_extension (#84065) __has_extension(...) doesn't work as intended when -pedantic-errors is used with Clang. With that flag, __has_extension(...) is equivalent to __has_feature(...), which means that checks like __has_extension(pragma_clang_attribute_external_declaration) will return 0. In turn, this has the effect of disabling availability markup in libc++, which is undesirable. rdar://124078119 --- libcxx/include/__availability | 7 +++--- ...lity-with-pedantic-errors.compile.pass.cpp | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp diff --git a/libcxx/include/__availability b/libcxx/include/__availability index 78438c55a3b7ba..bb3ed0a8da521b 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -72,11 +72,10 @@ # endif #endif -// Availability markup is disabled when building the library, or when the compiler +// Availability markup is disabled when building the library, or when a non-Clang +// compiler is used because only Clang supports the necessary attributes. // doesn't support the proper attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif diff --git a/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp new file mode 100644 index 00000000000000..c55a0a4d6e5d1b --- /dev/null +++ b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: stdlib=apple-libc++ + +// Test that using -pedantic-errors doesn't turn off availability annotations. +// This used to be the case because we used __has_extension(...) to enable the +// availability annotations, and -pedantic-errors changes the behavior of +// __has_extension(...) in an incompatible way. + +// ADDITIONAL_COMPILE_FLAGS: -pedantic-errors + +#include <__availability> + +#if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# error Availability annotations should be enabled on Apple platforms in the system configuration! +#endif From d0b702279819fe2fd3b3d2bfa274c895ac49f23b Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:23:37 -0500 Subject: [PATCH 091/158] [libc] Refactor stdfix extension from llvm_libc_ext.td to llvm_libc_stdfix_ext.td. (#84365) This fixes runtime build for armv6 baremetal targets: https://github.com/llvm/llvm-project/pull/83959#issuecomment-1984221249 --- libc/config/baremetal/api.td | 2 +- libc/config/linux/api.td | 1 + libc/spec/llvm_libc_ext.td | 20 -------------------- libc/spec/llvm_libc_stdfix_ext.td | 24 ++++++++++++++++++++++++ 4 files changed, 26 insertions(+), 21 deletions(-) create mode 100644 libc/spec/llvm_libc_stdfix_ext.td diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td index 008eb45386f242..33b3a03828e9c7 100644 --- a/libc/config/baremetal/api.td +++ b/libc/config/baremetal/api.td @@ -2,7 +2,7 @@ include "config/public_api.td" include "spec/stdc.td" include "spec/stdc_ext.td" -include "spec/llvm_libc_ext.td" +include "spec/llvm_libc_stdfix_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 526fd03f94f6a5..75432a2a298652 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -7,6 +7,7 @@ include "spec/gnu_ext.td" include "spec/bsd_ext.td" include "spec/stdc_ext.td" include "spec/llvm_libc_ext.td" +include "spec/llvm_libc_stdfix_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td index 3241ec0550376b..ca61d4ef371a2e 100644 --- a/libc/spec/llvm_libc_ext.td +++ b/libc/spec/llvm_libc_ext.td @@ -51,29 +51,9 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> { ] >; - HeaderSpec StdFix = HeaderSpec< - "stdfix.h", - [], // macros - [], // types - [], // enums - [ // functions - GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - ] - >; - let Headers = [ Assert, Sched, - StdFix, Strings, ]; } diff --git a/libc/spec/llvm_libc_stdfix_ext.td b/libc/spec/llvm_libc_stdfix_ext.td new file mode 100644 index 00000000000000..75bde47810a6be --- /dev/null +++ b/libc/spec/llvm_libc_stdfix_ext.td @@ -0,0 +1,24 @@ +def LLVMLibcStdfixExt : StandardSpec<"llvm_libc_stdfix_ext"> { + HeaderSpec StdFix = HeaderSpec< + "stdfix.h", + [], // macros + [], // types + [], // enums + [ // functions + GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + ] + >; + + let Headers = [ + StdFix, + ]; +} From a41226b05510a6f40d99fc622d78853460dc5599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= Date: Thu, 7 Mar 2024 21:32:49 +0100 Subject: [PATCH 092/158] [ValueTracking] Fix KnownBits conflict for calls (range vs returned) (#84353) If a function only exits for certain input values we can still derive that an argument is "returned". We can also derive range metadata that describe the possible value range returned by the function. However, it turns out that those two analyses can result in conflicting information. Example: declare i16 @foo(i16 returned) ... %A = call i16 @foo(i16 4095), !range !{i16 32, i16 33} To avoid "Bits known to be one AND zero?" assertion failures we know make sure to discard the known bits for this kind of scenario. --- llvm/lib/Analysis/ValueTracking.cpp | 6 ++++++ llvm/unittests/Analysis/ValueTrackingTest.cpp | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 52ae9f034e5d34..6d0e79e11eed43 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1476,6 +1476,12 @@ static void computeKnownBitsFromOperator(const Operator *I, if (RV->getType() == I->getType()) { computeKnownBits(RV, Known2, Depth + 1, Q); Known = Known.unionWith(Known2); + // If the function doesn't return properly for all input values + // (e.g. unreachable exits) then there might be conflicts between the + // argument value and the range metadata. Simply discard the known bits + // in case of conflicts. + if (Known.hasConflict()) + Known.resetAll(); } } if (const IntrinsicInst *II = dyn_cast(I)) { diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 9e0abe7a16df98..6c6897d83a256e 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2359,6 +2359,20 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsFreeze) { EXPECT_EQ(Known.One.getZExtValue(), 0u); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsReturnedRangeConflict) { + parseAssembly( + "declare i16 @foo(i16 returned)\n" + "\n" + "define i16 @test() {\n" + " %A = call i16 @foo(i16 4095), !range !{i16 32, i16 33}\n" + " ret i16 %A\n" + "}\n"); + // The call returns 32 according to range metadata, but 4095 according to the + // returned arg operand. Given the conflicting information we expect that the + // known bits information simply is cleared. + expectKnownBits(/*zero*/ 0u, /*one*/ 0u); +} + TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRange) { parseAssembly("define void @test(ptr %p) {\n" " %A = load i64, ptr %p, !range !{i64 64, i64 65536}\n" From 458636690afdd223ffa72f49164f30449b588892 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 7 Mar 2024 12:41:06 -0800 Subject: [PATCH 093/158] [lldb] Do some gardening in ProgressReportTest (NFC) (#84278) - Factor our common setup code. - Split the ProgressManager test into separate tests as they test separate things. - Fix usage of EXPECT (which continues on failure) and ASSERT (which halts on failure). We must use the latter when calling GetEvent as otherwise we'll try to dereference a null EventSP. --- lldb/unittests/Core/ProgressReportTest.cpp | 199 ++++++++++----------- 1 file changed, 96 insertions(+), 103 deletions(-) diff --git a/lldb/unittests/Core/ProgressReportTest.cpp b/lldb/unittests/Core/ProgressReportTest.cpp index e0253cbc4ec59b..1f993180fd8392 100644 --- a/lldb/unittests/Core/ProgressReportTest.cpp +++ b/lldb/unittests/Core/ProgressReportTest.cpp @@ -22,9 +22,29 @@ using namespace lldb; using namespace lldb_private; +static std::chrono::milliseconds TIMEOUT(100); + class ProgressReportTest : public ::testing::Test { - SubsystemRAII subsystems; +public: + ListenerSP CreateListenerFor(uint32_t bit) { + // Set up the debugger, make sure that was done properly. + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform( + PlatformRemoteMacOSX::CreateInstance(true, &arch)); + + m_debugger_sp = Debugger::CreateInstance(); + + // Get the debugger's broadcaster. + Broadcaster &broadcaster = m_debugger_sp->GetBroadcaster(); + + // Create a listener, make sure it can receive events and that it's + // listening to the correct broadcast bit. + m_listener_sp = Listener::MakeListener("progress-listener"); + m_listener_sp->StartListeningForEvents(&broadcaster, bit); + return m_listener_sp; + } +protected: // The debugger's initialization function can't be called with no arguments // so calling it using SubsystemRAII will cause the test build to fail as // SubsystemRAII will call Initialize with no arguments. As such we set it up @@ -33,30 +53,14 @@ class ProgressReportTest : public ::testing::Test { std::call_once(TestUtilities::g_debugger_initialize_flag, []() { Debugger::Initialize(nullptr); }); }; + + DebuggerSP m_debugger_sp; + ListenerSP m_listener_sp; + SubsystemRAII subsystems; }; TEST_F(ProgressReportTest, TestReportCreation) { - std::chrono::milliseconds timeout(100); - - // Set up the debugger, make sure that was done properly. - ArchSpec arch("x86_64-apple-macosx-"); - Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); - - DebuggerSP debugger_sp = Debugger::CreateInstance(); - ASSERT_TRUE(debugger_sp); - - // Get the debugger's broadcaster. - Broadcaster &broadcaster = debugger_sp->GetBroadcaster(); - - // Create a listener, make sure it can receive events and that it's - // listening to the correct broadcast bit. - ListenerSP listener_sp = Listener::MakeListener("progress-listener"); - - listener_sp->StartListeningForEvents(&broadcaster, - Debugger::eBroadcastBitProgress); - EXPECT_TRUE( - broadcaster.EventTypeHasListeners(Debugger::eBroadcastBitProgress)); - + ListenerSP listener_sp = CreateListenerFor(Debugger::eBroadcastBitProgress); EventSP event_sp; const ProgressEventData *data; @@ -73,82 +77,64 @@ TEST_F(ProgressReportTest, TestReportCreation) { // in this order: // Starting progress: 1, 2, 3 // Ending progress: 3, 2, 1 - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 1"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); + EXPECT_EQ(data->GetDetails(), "Starting report 1"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 2"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + EXPECT_EQ(data->GetDetails(), "Starting report 2"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 3"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + + EXPECT_EQ(data->GetDetails(), "Starting report 3"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); // Progress report objects should be destroyed at this point so // get each report from the queue and check that they've been // destroyed in reverse order. - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 3"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + EXPECT_EQ(data->GetTitle(), "Progress report 3"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 2"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + EXPECT_EQ(data->GetTitle(), "Progress report 2"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 1"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); + EXPECT_EQ(data->GetTitle(), "Progress report 1"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); } TEST_F(ProgressReportTest, TestProgressManager) { - std::chrono::milliseconds timeout(100); - - // Set up the debugger, make sure that was done properly. - ArchSpec arch("x86_64-apple-macosx-"); - Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); - - DebuggerSP debugger_sp = Debugger::CreateInstance(); - ASSERT_TRUE(debugger_sp); - - // Get the debugger's broadcaster. - Broadcaster &broadcaster = debugger_sp->GetBroadcaster(); - - // Create a listener, make sure it can receive events and that it's - // listening to the correct broadcast bit. - ListenerSP listener_sp = Listener::MakeListener("progress-category-listener"); - - listener_sp->StartListeningForEvents(&broadcaster, - Debugger::eBroadcastBitProgressCategory); - EXPECT_TRUE(broadcaster.EventTypeHasListeners( - Debugger::eBroadcastBitProgressCategory)); - + ListenerSP listener_sp = + CreateListenerFor(Debugger::eBroadcastBitProgressCategory); EventSP event_sp; const ProgressEventData *data; @@ -160,28 +146,35 @@ TEST_F(ProgressReportTest, TestProgressManager) { Progress progress1("Progress report 1", "Starting report 1"); Progress progress2("Progress report 1", "Starting report 2"); Progress progress3("Progress report 1", "Starting report 3"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); - EXPECT_FALSE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + ASSERT_FALSE(listener_sp->GetEvent(event_sp, TIMEOUT)); } data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1"); // Pop another event from the queue, this should be the event for the final // report for this category. - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); - + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1"); + + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1"); +} + +TEST_F(ProgressReportTest, TestOverlappingEvents) { + ListenerSP listener_sp = + CreateListenerFor(Debugger::eBroadcastBitProgressCategory); + EventSP event_sp; + const ProgressEventData *data; // Create two progress reports of the same category that overlap with each // other. Here we want to ensure that the ID broadcasted for the initial and @@ -192,28 +185,28 @@ TEST_F(ProgressReportTest, TestProgressManager) { std::make_unique("Overlapping report 1", "Starting report 2"); overlap_progress1.reset(); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); // Get the ID used in the first report for this category. uint64_t expected_progress_id = data->GetID(); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Overlapping report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Overlapping report 1"); overlap_progress2.reset(); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Overlapping report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Overlapping report 1"); // The progress ID for the final report should be the same as that for the // initial report. - ASSERT_EQ(data->GetID(), expected_progress_id); + EXPECT_EQ(data->GetID(), expected_progress_id); } From 11185715a28c6592ca6fe247fe693b305c85627a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 7 Mar 2024 12:44:25 -0800 Subject: [PATCH 094/158] Revert "[SLP]Improve minbitwidth analysis." This reverts commit 4ce52e2d576937fe930294cae883a0daa17eeced to fix issues detected by https://lab.llvm.org/buildbot/#/builders/74/builds/26470/steps/12/logs/stdio. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 634 ++++++------------ .../SLPVectorizer/AArch64/ext-trunc.ll | 9 +- .../SLPVectorizer/AArch64/getelementptr2.ll | 4 +- .../SLPVectorizer/AArch64/reduce-add-i64.ll | 20 +- .../SLPVectorizer/RISCV/reductions.ll | 7 +- .../Transforms/SLPVectorizer/X86/PR35777.ll | 9 +- .../X86/int-bitcast-minbitwidth.ll | 2 +- ...minbitwidth-multiuse-with-insertelement.ll | 17 +- .../X86/minbitwidth-transformed-operand.ll | 21 +- .../SLPVectorizer/X86/minimum-sizes.ll | 43 +- .../SLPVectorizer/X86/phi-undef-input.ll | 24 +- .../Transforms/SLPVectorizer/X86/resched.ll | 32 +- .../X86/reused-reductions-with-minbitwidth.ll | 10 +- .../X86/store-insertelement-minbitwidth.ll | 22 +- .../SLPVectorizer/alt-cmp-vectorize.ll | 4 +- 15 files changed, 305 insertions(+), 553 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1889bc09e85028..36dc9094538ae9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1085,9 +1085,6 @@ class BoUpSLP { BS->clear(); } MinBWs.clear(); - ReductionBitWidth = 0; - CastMaxMinBWSizes.reset(); - TruncNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -2290,7 +2287,6 @@ class BoUpSLP { void clearReductionData() { AnalyzedReductionsRoots.clear(); AnalyzedReductionVals.clear(); - AnalyzedMinBWVals.clear(); } /// Checks if the given value is gathered in one of the nodes. bool isAnyGathered(const SmallDenseSet &Vals) const { @@ -2311,11 +2307,9 @@ class BoUpSLP { /// constant and to be demoted. Required to correctly identify constant nodes /// to be demoted. bool collectValuesToDemote( - Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, - SmallVectorImpl &ToDemote, + Value *V, SmallVectorImpl &ToDemote, DenseMap> &DemotedConsts, - DenseSet &Visited, unsigned &MaxDepthLevel, - bool &IsProfitableToDemote) const; + SmallVectorImpl &Roots, DenseSet &Visited) const; /// Check if the operands on the edges \p Edges of the \p UserTE allows /// reordering (i.e. the operands can be reordered because they have only one @@ -2381,10 +2375,6 @@ class BoUpSLP { /// \ returns the graph entry for the \p Idx operand of the \p E entry. const TreeEntry *getOperandEntry(const TreeEntry *E, unsigned Idx) const; - /// \returns Cast context for the given graph node. - TargetTransformInfo::CastContextHint - getCastContextHint(const TreeEntry &TE) const; - /// \returns the cost of the vectorizable entry. InstructionCost getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, @@ -2935,18 +2925,11 @@ class BoUpSLP { } assert(!BundleMember && "Bundle and VL out of sync"); } else { + MustGather.insert(VL.begin(), VL.end()); // Build a map for gathered scalars to the nodes where they are used. - bool AllConstsOrCasts = true; for (Value *V : VL) - if (!isConstant(V)) { - auto *I = dyn_cast(V); - AllConstsOrCasts &= I && I->getType()->isIntegerTy(); + if (!isConstant(V)) ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last); - } - if (AllConstsOrCasts) - CastMaxMinBWSizes = - std::make_pair(std::numeric_limits::max(), 1); - MustGather.insert(VL.begin(), VL.end()); } if (UserTreeIdx.UserTE) @@ -3071,10 +3054,6 @@ class BoUpSLP { /// Set of hashes for the list of reduction values already being analyzed. DenseSet AnalyzedReductionVals; - /// Values, already been analyzed for mininmal bitwidth and found to be - /// non-profitable. - DenseSet AnalyzedMinBWVals; - /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User /// can be nullptr, it means that this Internal Scalar will be used later, @@ -3650,18 +3629,6 @@ class BoUpSLP { /// value must be signed-extended, rather than zero-extended, back to its /// original width. DenseMap> MinBWs; - - /// Final size of the reduced vector, if the current graph represents the - /// input for the reduction and it was possible to narrow the size of the - /// reduction. - unsigned ReductionBitWidth = 0; - - /// If the tree contains any zext/sext/trunc nodes, contains max-min pair of - /// type sizes, used in the tree. - std::optional> CastMaxMinBWSizes; - - /// Indices of the vectorized trunc nodes. - DenseSet TruncNodes; }; } // end namespace slpvectorizer @@ -6572,29 +6539,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - auto [PrevMaxBW, PrevMinBW] = CastMaxMinBWSizes.value_or( - std::make_pair(std::numeric_limits::min(), - std::numeric_limits::max())); - if (ShuffleOrOp == Instruction::ZExt || - ShuffleOrOp == Instruction::SExt) { - CastMaxMinBWSizes = std::make_pair( - std::max(DL->getTypeSizeInBits(VL0->getType()), - PrevMaxBW), - std::min( - DL->getTypeSizeInBits(VL0->getOperand(0)->getType()), - PrevMinBW)); - } else if (ShuffleOrOp == Instruction::Trunc) { - CastMaxMinBWSizes = std::make_pair( - std::max( - DL->getTypeSizeInBits(VL0->getOperand(0)->getType()), - PrevMaxBW), - std::min(DL->getTypeSizeInBits(VL0->getType()), - PrevMinBW)); - TruncNodes.insert(VectorizableTree.size()); - } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); TE->setOperandsInOrder(); @@ -8416,22 +8362,6 @@ const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E, return It->get(); } -TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { - if (TE.State == TreeEntry::ScatterVectorize || - TE.State == TreeEntry::StridedVectorize) - return TTI::CastContextHint::GatherScatter; - if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::Load && - !TE.isAltShuffle()) { - if (TE.ReorderIndices.empty()) - return TTI::CastContextHint::Normal; - SmallVector Mask; - inversePermutation(TE.ReorderIndices, Mask); - if (ShuffleVectorInst::isReverseMask(Mask, Mask.size())) - return TTI::CastContextHint::Reversed; - } - return TTI::CastContextHint::None; -} - InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallPtrSetImpl &CheckedExtracts) { @@ -8454,7 +8384,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. auto It = MinBWs.find(E); - Type *OrigScalarTy = ScalarTy; if (It != MinBWs.end()) { ScalarTy = IntegerType::get(F->getContext(), It->second.first); VecTy = FixedVectorType::get(ScalarTy, VL.size()); @@ -8512,11 +8441,24 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, UsedScalars.set(I); } auto GetCastContextHint = [&](Value *V) { - if (const TreeEntry *OpTE = getTreeEntry(V)) - return getCastContextHint(*OpTE); - InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); - if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) - return TTI::CastContextHint::GatherScatter; + if (const TreeEntry *OpTE = getTreeEntry(V)) { + if (OpTE->State == TreeEntry::ScatterVectorize || + OpTE->State == TreeEntry::StridedVectorize) + return TTI::CastContextHint::GatherScatter; + if (OpTE->State == TreeEntry::Vectorize && + OpTE->getOpcode() == Instruction::Load && !OpTE->isAltShuffle()) { + if (OpTE->ReorderIndices.empty()) + return TTI::CastContextHint::Normal; + SmallVector Mask; + inversePermutation(OpTE->ReorderIndices, Mask); + if (ShuffleVectorInst::isReverseMask(Mask, Mask.size())) + return TTI::CastContextHint::Reversed; + } + } else { + InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); + if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) + return TTI::CastContextHint::GatherScatter; + } return TTI::CastContextHint::None; }; auto GetCostDiff = @@ -8565,6 +8507,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTI::CastContextHint CCH = GetCastContextHint(VL0); VecCost += TTI->getCastInstrCost(VecOpcode, UserVecTy, VecTy, CCH, CostKind); + ScalarCost += Sz * TTI->getCastInstrCost(VecOpcode, UserScalarTy, + ScalarTy, CCH, CostKind); } } } @@ -8581,7 +8525,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = 0; InstructionCost VecCost = 0; std::tie(ScalarCost, VecCost) = getGEPCosts( - *TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, OrigScalarTy, VecTy); + *TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, ScalarTy, VecTy); LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost, "Calculated GEPs cost for Tree")); @@ -8628,7 +8572,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, NumElts = ATy->getNumElements(); else NumElts = AggregateTy->getStructNumElements(); - SrcVecTy = FixedVectorType::get(OrigScalarTy, NumElts); + SrcVecTy = FixedVectorType::get(ScalarTy, NumElts); } if (I->hasOneUse()) { Instruction *Ext = I->user_back(); @@ -8796,7 +8740,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, } } auto GetScalarCost = [&](unsigned Idx) -> InstructionCost { - auto *VI = cast(UniqueValues[Idx]); + // Do not count cost here if minimum bitwidth is in effect and it is just + // a bitcast (here it is just a noop). + if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast) + return TTI::TCC_Free; + auto *VI = VL0->getOpcode() == Opcode + ? cast(UniqueValues[Idx]) + : nullptr; return TTI->getCastInstrCost(Opcode, VL0->getType(), VL0->getOperand(0)->getType(), TTI::getCastContextHint(VI), CostKind, VI); @@ -8839,7 +8789,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ? CmpInst::BAD_FCMP_PREDICATE : CmpInst::BAD_ICMP_PREDICATE; - return TTI->getCmpSelInstrCost(E->getOpcode(), OrigScalarTy, + return TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), CurrentPred, CostKind, VI); }; @@ -8894,7 +8844,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(VI->getOperand(OpIdx)); SmallVector Operands(VI->operand_values()); - return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind, + return TTI->getArithmeticInstrCost(ShuffleOrOp, ScalarTy, CostKind, Op1Info, Op2Info, Operands, VI); }; auto GetVectorCost = [=](InstructionCost CommonCost) { @@ -8913,9 +8863,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::Load: { auto GetScalarCost = [&](unsigned Idx) { auto *VI = cast(UniqueValues[Idx]); - return TTI->getMemoryOpCost(Instruction::Load, OrigScalarTy, - VI->getAlign(), VI->getPointerAddressSpace(), - CostKind, TTI::OperandValueInfo(), VI); + return TTI->getMemoryOpCost(Instruction::Load, ScalarTy, VI->getAlign(), + VI->getPointerAddressSpace(), CostKind, + TTI::OperandValueInfo(), VI); }; auto *LI0 = cast(VL0); auto GetVectorCost = [&](InstructionCost CommonCost) { @@ -8958,9 +8908,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetScalarCost = [=](unsigned Idx) { auto *VI = cast(VL[Idx]); TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getValueOperand()); - return TTI->getMemoryOpCost(Instruction::Store, OrigScalarTy, - VI->getAlign(), VI->getPointerAddressSpace(), - CostKind, OpInfo, VI); + return TTI->getMemoryOpCost(Instruction::Store, ScalarTy, VI->getAlign(), + VI->getPointerAddressSpace(), CostKind, + OpInfo, VI); }; auto *BaseSI = cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); @@ -9822,44 +9772,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { Cost -= InsertCost; } - // Add the cost for reduced value resize (if required). - if (ReductionBitWidth != 0) { - assert(UserIgnoreList && "Expected reduction tree."); - const TreeEntry &E = *VectorizableTree.front().get(); - auto It = MinBWs.find(&E); - if (It != MinBWs.end() && It->second.first != ReductionBitWidth) { - unsigned SrcSize = It->second.first; - unsigned DstSize = ReductionBitWidth; - unsigned Opcode = Instruction::Trunc; - if (SrcSize < DstSize) - Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt; - auto *SrcVecTy = - FixedVectorType::get(Builder.getIntNTy(SrcSize), E.getVectorFactor()); - auto *DstVecTy = - FixedVectorType::get(Builder.getIntNTy(DstSize), E.getVectorFactor()); - TTI::CastContextHint CCH = getCastContextHint(E); - InstructionCost CastCost; - switch (E.getOpcode()) { - case Instruction::SExt: - case Instruction::ZExt: - case Instruction::Trunc: { - const TreeEntry *OpTE = getOperandEntry(&E, 0); - CCH = getCastContextHint(*OpTE); - break; - } - default: - break; - } - CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH, - TTI::TCK_RecipThroughput); - Cost += CastCost; - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost - << " for final resize for reduction from " << SrcVecTy - << " to " << DstVecTy << "\n"; - dbgs() << "SLP: Current total cost = " << Cost << "\n"); - } - } - #ifndef NDEBUG SmallString<256> Str; { @@ -10080,30 +9992,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // tree node for each gathered value - we have just a permutation of the // single vector. If we have 2 different sets, we're in situation where we // have a permutation of 2 input vectors. - // Filter out entries with larger bitwidth of elements. - Type *ScalarTy = VL.front()->getType(); - unsigned BitWidth = 0; - if (ScalarTy->isIntegerTy()) { - // Check if the used TEs supposed to be resized and choose the best - // candidates. - BitWidth = DL->getTypeStoreSize(ScalarTy); - if (TEUseEI.UserTE->getOpcode() != Instruction::Select || - TEUseEI.EdgeIdx != 0) { - auto UserIt = MinBWs.find(TEUseEI.UserTE); - if (UserIt != MinBWs.end()) - BitWidth = UserIt->second.second; - } - } - auto CheckBitwidth = [&](const TreeEntry &TE) { - Type *ScalarTy = TE.Scalars.front()->getType(); - if (!ScalarTy->isIntegerTy()) - return true; - unsigned TEBitWidth = DL->getTypeStoreSize(ScalarTy); - auto UserIt = MinBWs.find(TEUseEI.UserTE); - if (UserIt != MinBWs.end()) - TEBitWidth = UserIt->second.second; - return BitWidth == TEBitWidth; - }; SmallVector> UsedTEs; DenseMap UsedValuesEntry; for (Value *V : VL) { @@ -10138,8 +10026,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; } - if (!CheckBitwidth(*TEPtr)) - continue; // Check if the user node of the TE comes after user node of TEPtr, // otherwise TEPtr depends on TE. if ((TEInsertBlock != InsertPt->getParent() || @@ -10156,8 +10042,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; VTE = *It->getSecond().begin(); // Iterate through all vectorized nodes. - auto *MIt = find_if(It->getSecond(), [&](const TreeEntry *MTE) { - return MTE->State == TreeEntry::Vectorize && CheckBitwidth(*MTE); + auto *MIt = find_if(It->getSecond(), [](const TreeEntry *MTE) { + return MTE->State == TreeEntry::Vectorize; }); if (MIt == It->getSecond().end()) continue; @@ -10167,7 +10053,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( Instruction &LastBundleInst = getLastInstructionInBundle(VTE); if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst)) continue; - if (!CheckBitwidth(*VTE)) + auto It = MinBWs.find(VTE); + // If vectorize node is demoted - do not match. + if (It != MinBWs.end() && + It->second.first != DL->getTypeSizeInBits(V->getType())) continue; VToTEs.insert(VTE); } @@ -13040,21 +12929,7 @@ Value *BoUpSLP::vectorizeTree( Builder.ClearInsertionPoint(); InstrElementSize.clear(); - const TreeEntry &RootTE = *VectorizableTree.front().get(); - Value *Vec = RootTE.VectorizedValue; - if (auto It = MinBWs.find(&RootTE); ReductionBitWidth != 0 && - It != MinBWs.end() && - ReductionBitWidth != It->second.first) { - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(ReductionRoot->getParent(), - ReductionRoot->getIterator()); - Vec = Builder.CreateIntCast( - Vec, - VectorType::get(Builder.getIntNTy(ReductionBitWidth), - cast(Vec->getType())->getElementCount()), - It->second.second); - } - return Vec; + return VectorizableTree[0]->VectorizedValue; } void BoUpSLP::optimizeGatherSequence() { @@ -13874,42 +13749,23 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // smaller type with a truncation. We collect the values that will be demoted // in ToDemote and additional roots that require investigating in Roots. bool BoUpSLP::collectValuesToDemote( - Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, - SmallVectorImpl &ToDemote, + Value *V, SmallVectorImpl &ToDemote, DenseMap> &DemotedConsts, - DenseSet &Visited, unsigned &MaxDepthLevel, - bool &IsProfitableToDemote) const { + SmallVectorImpl &Roots, DenseSet &Visited) const { // We can always demote constants. - if (isa(V)) { - MaxDepthLevel = 1; + if (isa(V)) return true; - } // If the value is not a vectorized instruction in the expression and not used // by the insertelement instruction and not used in multiple vector nodes, it // cannot be demoted. - // TODO: improve handling of gathered values and others. auto *I = dyn_cast(V); - if (!I || !Visited.insert(I).second || !getTreeEntry(I) || - MultiNodeScalars.contains(I) || all_of(I->users(), [&](User *U) { + if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) || + !Visited.insert(I).second || all_of(I->users(), [&](User *U) { return isa(U) && !getTreeEntry(U); })) return false; - auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { - if (MultiNodeScalars.contains(V)) - return false; - uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); - APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL))) - return true; - auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); - unsigned BitWidth1 = OrigBitWidth - NumSignBits; - if (!isKnownNonNegative(V, SimplifyQuery(*DL))) - ++BitWidth1; - BitWidth = std::max(BitWidth, BitWidth1); - return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2); - }; unsigned Start = 0; unsigned End = I->getNumOperands(); switch (I->getOpcode()) { @@ -13917,14 +13773,12 @@ bool BoUpSLP::collectValuesToDemote( // We can always demote truncations and extensions. Since truncations can // seed additional demotion, we save the truncated value. case Instruction::Trunc: - MaxDepthLevel = 1; - if (IsProfitableToDemoteRoot) - IsProfitableToDemote = true; + Roots.push_back(I->getOperand(0)); break; case Instruction::ZExt: case Instruction::SExt: - MaxDepthLevel = 1; - IsProfitableToDemote = true; + if (isa(I->getOperand(0))) + return false; break; // We can demote certain binary operations if we can demote both of their @@ -13934,32 +13788,23 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::Mul: case Instruction::And: case Instruction::Or: - case Instruction::Xor: { - unsigned Level1, Level2; - if (!collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot, - BitWidth, ToDemote, DemotedConsts, Visited, - Level1, IsProfitableToDemote) || - !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot, - BitWidth, ToDemote, DemotedConsts, Visited, - Level2, IsProfitableToDemote)) + case Instruction::Xor: + if (!collectValuesToDemote(I->getOperand(0), ToDemote, DemotedConsts, Roots, + Visited) || + !collectValuesToDemote(I->getOperand(1), ToDemote, DemotedConsts, Roots, + Visited)) return false; - MaxDepthLevel = std::max(Level1, Level2); break; - } // We can demote selects if we can demote their true and false values. case Instruction::Select: { Start = 1; - unsigned Level1, Level2; SelectInst *SI = cast(I); - if (!collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot, - BitWidth, ToDemote, DemotedConsts, Visited, - Level1, IsProfitableToDemote) || - !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot, - BitWidth, ToDemote, DemotedConsts, Visited, - Level2, IsProfitableToDemote)) + if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, DemotedConsts, + Roots, Visited) || + !collectValuesToDemote(SI->getFalseValue(), ToDemote, DemotedConsts, + Roots, Visited)) return false; - MaxDepthLevel = std::max(Level1, Level2); break; } @@ -13968,262 +13813,171 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::PHI: { PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth, - ToDemote, DemotedConsts, Visited, - MaxDepthLevel, IsProfitableToDemote)) + if (!collectValuesToDemote(IncValue, ToDemote, DemotedConsts, Roots, + Visited)) return false; break; } // Otherwise, conservatively give up. default: - return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth); + return false; } - ++MaxDepthLevel; // Gather demoted constant operands. for (unsigned Idx : seq(Start, End)) if (isa(I->getOperand(Idx))) DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx); // Record the value that we can demote. ToDemote.push_back(V); - return IsProfitableToDemote; + return true; } void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. - bool IsStoreOrInsertElt = - VectorizableTree.front()->getOpcode() == Instruction::Store || - VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && - (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || - CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) + auto &TreeRoot = VectorizableTree[0]->Scalars; + auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); + if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather) return; - unsigned NodeIdx = 0; - if (IsStoreOrInsertElt && - VectorizableTree.front()->State != TreeEntry::NeedToGather) - NodeIdx = 1; - // Ensure the roots of the vectorizable tree don't form a cycle. - if (VectorizableTree[NodeIdx]->State == TreeEntry::NeedToGather || - (NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) || - (NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices, - [NodeIdx](const EdgeInfo &EI) { - return EI.UserTE->Idx > - static_cast(NodeIdx); - }))) - return; - - // The first value node for store/insertelement is sext/zext/trunc? Skip it, - // resize to the final type. - bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt; - if (NodeIdx != 0 && - VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize && - (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt || - VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt || - VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) { - assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph."); - ++NodeIdx; - IsProfitableToDemoteRoot = true; - } - - // Analyzed in reduction already and not profitable - exit. - if (AnalyzedMinBWVals.contains(VectorizableTree[NodeIdx]->Scalars.front())) + if (!VectorizableTree.front()->UserTreeIndices.empty()) return; - SmallVector ToDemote; + // Conservatively determine if we can actually truncate the roots of the + // expression. Collect the values that can be demoted in ToDemote and + // additional roots that require investigating in Roots. + SmallVector ToDemote; DenseMap> DemotedConsts; - auto ComputeMaxBitWidth = [&](ArrayRef TreeRoot, unsigned VF, - bool IsTopRoot, bool IsProfitableToDemoteRoot, - unsigned Opcode, unsigned Limit) { - ToDemote.clear(); - auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); - if (!TreeRootIT || !Opcode) - return 0u; - - if (AnalyzedMinBWVals.contains(TreeRoot.front())) - return 0u; - - unsigned NumParts = TTI->getNumberOfParts( - FixedVectorType::get(TreeRoot.front()->getType(), VF)); - - // The maximum bit width required to represent all the values that can be - // demoted without loss of precision. It would be safe to truncate the roots - // of the expression to this width. - unsigned MaxBitWidth = 1u; - - // True if the roots can be zero-extended back to their original type, - // rather than sign-extended. We know that if the leading bits are not - // demanded, we can safely zero-extend. So we initialize IsKnownPositive to - // True. + SmallVector Roots; + for (auto *Root : TreeRoot) { + DenseSet Visited; + if (!collectValuesToDemote(Root, ToDemote, DemotedConsts, Roots, Visited)) + return; + } + + // The maximum bit width required to represent all the values that can be + // demoted without loss of precision. It would be safe to truncate the roots + // of the expression to this width. + auto MaxBitWidth = 1u; + + // We first check if all the bits of the roots are demanded. If they're not, + // we can truncate the roots to this narrower type. + for (auto *Root : TreeRoot) { + auto Mask = DB->getDemandedBits(cast(Root)); + MaxBitWidth = std::max(Mask.getBitWidth() - Mask.countl_zero(), + MaxBitWidth); + } + + // True if the roots can be zero-extended back to their original type, rather + // than sign-extended. We know that if the leading bits are not demanded, we + // can safely zero-extend. So we initialize IsKnownPositive to True. + bool IsKnownPositive = true; + + // If all the bits of the roots are demanded, we can try a little harder to + // compute a narrower type. This can happen, for example, if the roots are + // getelementptr indices. InstCombine promotes these indices to the pointer + // width. Thus, all their bits are technically demanded even though the + // address computation might be vectorized in a smaller type. + // + // We start by looking at each entry that can be demoted. We compute the + // maximum bit width required to store the scalar by using ValueTracking to + // compute the number of high-order bits we can truncate. + if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) && + all_of(TreeRoot, [](Value *V) { + return all_of(V->users(), + [](User *U) { return isa(U); }); + })) { + MaxBitWidth = 8u; + // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - bool IsKnownPositive = all_of(TreeRoot, [&](Value *R) { + IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); - // We first check if all the bits of the roots are demanded. If they're not, - // we can truncate the roots to this narrower type. - for (auto *Root : TreeRoot) { - unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); - TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()); - unsigned BitWidth1 = NumTypeBits - NumSignBits; - // If we can't prove that the sign bit is zero, we must add one to the - // maximum bit width to account for the unknown sign bit. This preserves - // the existing sign bit so we can safely sign-extend the root back to the - // original type. Otherwise, if we know the sign bit is zero, we will - // zero-extend the root instead. - // - // FIXME: This is somewhat suboptimal, as there will be cases where adding - // one to the maximum bit width will yield a larger-than-necessary - // type. In general, we need to add an extra bit only if we can't - // prove that the upper bit of the original type is equal to the - // upper bit of the proposed smaller type. If these two bits are - // the same (either zero or one) we know that sign-extending from - // the smaller type will result in the same value. Here, since we - // can't yet prove this, we are just making the proposed smaller - // type larger to ensure correctness. - if (!IsKnownPositive) - ++BitWidth1; - - APInt Mask = DB->getDemandedBits(cast(Root)); - unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); - MaxBitWidth = - std::max(std::min(BitWidth1, BitWidth2), MaxBitWidth); - } - - if (MaxBitWidth < 8 && MaxBitWidth > 1) - MaxBitWidth = 8; - - // If the original type is large, but reduced type does not improve the reg - // use - ignore it. - if (NumParts > 1 && - NumParts == - TTI->getNumberOfParts(FixedVectorType::get( - IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF))) - return 0u; - - bool IsProfitableToDemote = Opcode == Instruction::Trunc || - Opcode == Instruction::SExt || - Opcode == Instruction::ZExt || NumParts > 1; - // Conservatively determine if we can actually truncate the roots of the - // expression. Collect the values that can be demoted in ToDemote and - // additional roots that require investigating in Roots. - for (auto *Root : TreeRoot) { - DenseSet Visited; - unsigned MaxDepthLevel; - bool NeedToDemote = IsProfitableToDemote; - - if (!collectValuesToDemote(Root, IsProfitableToDemoteRoot, MaxBitWidth, - ToDemote, DemotedConsts, Visited, - MaxDepthLevel, NeedToDemote) || - (MaxDepthLevel <= Limit && - !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && - (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || - DL->getTypeSizeInBits(Root->getType()) / - DL->getTypeSizeInBits( - cast(Root)->getOperand(0)->getType()) > - 2)) || - (Opcode == Instruction::Trunc && - (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || - DL->getTypeSizeInBits( - cast(Root)->getOperand(0)->getType()) / - DL->getTypeSizeInBits(Root->getType()) > - 2))))) - return 0u; - } - // Round MaxBitWidth up to the next power-of-two. - MaxBitWidth = bit_ceil(MaxBitWidth); - - return MaxBitWidth; - }; + // Determine the maximum number of bits required to store the scalar + // values. + for (auto *Scalar : ToDemote) { + auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT); + auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); + MaxBitWidth = std::max(NumTypeBits - NumSignBits, MaxBitWidth); + } + + // If we can't prove that the sign bit is zero, we must add one to the + // maximum bit width to account for the unknown sign bit. This preserves + // the existing sign bit so we can safely sign-extend the root back to the + // original type. Otherwise, if we know the sign bit is zero, we will + // zero-extend the root instead. + // + // FIXME: This is somewhat suboptimal, as there will be cases where adding + // one to the maximum bit width will yield a larger-than-necessary + // type. In general, we need to add an extra bit only if we can't + // prove that the upper bit of the original type is equal to the + // upper bit of the proposed smaller type. If these two bits are the + // same (either zero or one) we know that sign-extending from the + // smaller type will result in the same value. Here, since we can't + // yet prove this, we are just making the proposed smaller type + // larger to ensure correctness. + if (!IsKnownPositive) + ++MaxBitWidth; + } + + // Round MaxBitWidth up to the next power-of-two. + MaxBitWidth = llvm::bit_ceil(MaxBitWidth); + + // If the maximum bit width we compute is less than the with of the roots' + // type, we can proceed with the narrowing. Otherwise, do nothing. + if (MaxBitWidth >= TreeRootIT->getBitWidth()) + return; // If we can truncate the root, we must collect additional values that might // be demoted as a result. That is, those seeded by truncations we will // modify. - // Add reduction ops sizes, if any. - if (UserIgnoreList && - isa(VectorizableTree.front()->Scalars.front()->getType())) { - for (Value *V : *UserIgnoreList) { - auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); - auto NumTypeBits = DL->getTypeSizeInBits(V->getType()); - unsigned BitWidth1 = NumTypeBits - NumSignBits; - if (!isKnownNonNegative(V, SimplifyQuery(*DL))) - ++BitWidth1; - auto Mask = DB->getDemandedBits(cast(V)); - unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); - ReductionBitWidth = - std::max(std::min(BitWidth1, BitWidth2), ReductionBitWidth); - } - if (ReductionBitWidth < 8 && ReductionBitWidth > 1) - ReductionBitWidth = 8; - - ReductionBitWidth = bit_ceil(ReductionBitWidth); - } - bool IsTopRoot = NodeIdx == 0; - while (NodeIdx < VectorizableTree.size() && - VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize && - VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) - ++NodeIdx; - while (NodeIdx < VectorizableTree.size()) { - ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; - unsigned Limit = 2; - unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode(); - if (IsTopRoot && - ReductionBitWidth == - DL->getTypeSizeInBits( - VectorizableTree.front()->Scalars.front()->getType())) - Limit = 3; - unsigned MaxBitWidth = ComputeMaxBitWidth( - TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, - IsProfitableToDemoteRoot, Opcode, Limit); - IsTopRoot = false; - IsProfitableToDemoteRoot = true; - - if (TruncNodes.empty()) { - NodeIdx = VectorizableTree.size(); - } else { - NodeIdx = *TruncNodes.begin() + 1; - TruncNodes.erase(TruncNodes.begin()); - } - - // If the maximum bit width we compute is less than the with of the roots' - // type, we can proceed with the narrowing. Otherwise, do nothing. - if (MaxBitWidth == 0 || - MaxBitWidth >= - cast(TreeRoot.front()->getType())->getBitWidth()) { - if (UserIgnoreList) - AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end()); + while (!Roots.empty()) { + DenseSet Visited; + collectValuesToDemote(Roots.pop_back_val(), ToDemote, DemotedConsts, Roots, + Visited); + } + + // Check that all users are marked for demotion. + DenseSet Demoted(ToDemote.begin(), ToDemote.end()); + DenseSet Visited; + for (Value *V: ToDemote) { + const TreeEntry *TE = getTreeEntry(V); + assert(TE && "Expected vectorized scalar."); + if (!Visited.insert(TE).second) continue; - } - - // Finally, map the values we can demote to the maximum bit with we - // computed. - for (Value *Scalar : ToDemote) { - TreeEntry *TE = getTreeEntry(Scalar); - assert(TE && "Expected vectorized scalar."); - if (MinBWs.contains(TE)) - continue; - bool IsSigned = any_of(TE->Scalars, [&](Value *R) { - return !isKnownNonNegative(R, SimplifyQuery(*DL)); - }); - MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); - const auto *I = cast(Scalar); - auto DCIt = DemotedConsts.find(I); - if (DCIt != DemotedConsts.end()) { - for (unsigned Idx : DCIt->getSecond()) { - // Check that all instructions operands are demoted. + if (!all_of(TE->UserTreeIndices, [&](const EdgeInfo &EI) { + return all_of(EI.UserTE->Scalars, + [&](Value *V) { return Demoted.contains(V); }); + })) + return; + } + // Finally, map the values we can demote to the maximum bit with we computed. + for (auto *Scalar : ToDemote) { + auto *TE = getTreeEntry(Scalar); + assert(TE && "Expected vectorized scalar."); + if (MinBWs.contains(TE)) + continue; + bool IsSigned = any_of(TE->Scalars, [&](Value *R) { + KnownBits Known = computeKnownBits(R, *DL); + return !Known.isNonNegative(); + }); + MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); + const auto *I = cast(Scalar); + auto DCIt = DemotedConsts.find(I); + if (DCIt != DemotedConsts.end()) { + for (unsigned Idx : DCIt->getSecond()) { + // Check that all instructions operands are demoted. + if (all_of(TE->Scalars, [&](Value *V) { + auto SIt = DemotedConsts.find(cast(V)); + return SIt != DemotedConsts.end() && + is_contained(SIt->getSecond(), Idx); + })) { const TreeEntry *CTE = getOperandEntry(TE, Idx); - if (all_of(TE->Scalars, - [&](Value *V) { - auto SIt = DemotedConsts.find(cast(V)); - return SIt != DemotedConsts.end() && - is_contained(SIt->getSecond(), Idx); - }) || - all_of(CTE->Scalars, Constant::classof)) - MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned); + MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned); } } } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll index 5e3fd156666f5f..cef791633655a8 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -17,13 +17,12 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SUB0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4 ; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3 ; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll index 1cce52060c479f..47485e514ec2fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ;test_i16_extend NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s -; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll index a7a7f642ced538..d67fdc1cd6aa0e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll @@ -28,11 +28,21 @@ entry: define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64 -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: ret i64 [[ADD_3]] ; entry: %ld0 = load i8, ptr %ptr diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 500f10659f04cb..000e7a56df3778 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -802,10 +802,9 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64 -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; entry: %ld0 = load i8, ptr %ptr diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll index 05511f843a68fa..4565d4928ba4ad 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -15,12 +15,11 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1 ; CHECK-NEXT: ret { i64, i64 } [[T17]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index 5ee80160765387..a0af8e36b36c79 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-3 < %s | FileCheck %s define void @t(i64 %v) { ; CHECK-LABEL: define void @t( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll index 6051638562b59b..6e512fcbb73924 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll @@ -6,17 +6,18 @@ define void @test(i8 %0) { ; CHECK-SAME: i8 [[TMP0:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> , i8 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 1 ; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SHR]] to i8 ; CHECK-NEXT: store i8 [[CONV9]], ptr null, align 1 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 4acd63078b82ef..2c834616becc0d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -6,20 +6,15 @@ define void @test(i64 %d.promoted.i) { ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I_1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 -; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 -; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0 -; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> , i64 [[AND_1_I_1]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9 +; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0 +; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll index a316415dcc6b52..651631de2c35ad 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -17,15 +17,12 @@ target triple = "x86_64-unknown-linux-gnu" define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-LABEL: @PR31243_zext( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 -; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 -; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 -; SSE-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] +; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] ; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] @@ -76,15 +73,12 @@ entry: define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-LABEL: @PR31243_sext( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 -; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 -; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 -; SSE-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64 -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] +; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; SSE-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64 +; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; SSE-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64 +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] ; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] @@ -95,12 +89,13 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 ; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 -; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 -; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] -; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 -; AVX-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64 -; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] +; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0 +; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 +; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]] +; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1 +; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64 +; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]] ; AVX-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1 ; AVX-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1 ; AVX-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll index 3cc32c1fc7b28e..88f75c37846efc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -15,8 +15,8 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -52,8 +52,8 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -89,8 +89,8 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -127,8 +127,8 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -165,8 +165,8 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: @@ -202,8 +202,8 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index b7237cbb02bb32..78c6d9516a3dec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -11,26 +11,26 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[SHUFFLE1]], ; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5 ; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6 ; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <16 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_4_I_I]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_5_I_I]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_6_I_I]], i32 7 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[TMP13]] to <16 x i8> -; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i8> [[TMP14]], -; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr undef, align 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i8> [[TMP13]], +; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll index 1d1fcec2a7aeba..5d22b5a4873be3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll @@ -7,10 +7,12 @@ define i1 @test(i1 %cmp5.not.31) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> , i1 [[CMP5_NOT_31]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 0 -; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0 +; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: ret i1 [[CMP_NOT_I_I]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll index 2f6868d8dfd628..c1dd90d0e9a7bb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll @@ -8,18 +8,17 @@ ; YAML-NEXT: Function: stores ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-7' +; YAML-NEXT: - Cost: '-3' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) { ; CHECK-LABEL: @stores( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; %load.1 = load i8, ptr %in, align 1 @@ -64,18 +63,17 @@ define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) { ; YAML-NEXT: Function: insertelems ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-9' +; YAML-NEXT: - Cost: '-5' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define <4 x i64> @insertelems(ptr noalias %in, ptr noalias %inn) { ; CHECK-LABEL: @insertelems( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x i64> [[TMP5]] ; %load.1 = load i8, ptr %in, align 1 %gep.1 = getelementptr inbounds i8, ptr %in, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll b/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll index ff6f0bdd3db8f2..061fbdb45a13bc 100644 --- a/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll @@ -10,8 +10,8 @@ define i32 @alt_cmp(i16 %call46) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i16> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i16 [[TMP6]], 0 ; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[OP_RDX]] to i32 ; CHECK-NEXT: ret i32 [[EXT]] From ecf7db8b52d7061ef8f14c1f7b6fcc370072d087 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 7 Mar 2024 12:55:13 -0800 Subject: [PATCH 095/158] [lldb] Disable shell tests affected by ld_new bug (#84246) Equivalent to the changes made in https://github.com/llvm/llvm-project/pull/83941, except to support shell tests. --- .../test/Shell/Unwind/eh-frame-dwarf-unwind.test | 2 +- .../Unwind/thread-step-out-ret-addr-check.test | 2 +- lldb/test/Shell/lit.cfg.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test index 3df9906394f432..7b5d6650fe2f75 100644 --- a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test +++ b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test @@ -1,7 +1,7 @@ # Test handing of dwarf expressions specifying the location of registers, if # those expressions refer to the frame's CFA value. -# UNSUPPORTED: system-windows +# UNSUPPORTED: system-windows, ld_new-bug # REQUIRES: target-x86_64, native # RUN: %clang_host %p/Inputs/call-asm.c %p/Inputs/eh-frame-dwarf-unwind.s -o %t diff --git a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test index 682b0e5332b1c5..9bc7c78f79b26b 100644 --- a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test +++ b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test @@ -2,7 +2,7 @@ # points to non-executable memory. # REQUIRES: target-x86_64 -# UNSUPPORTED: system-windows +# UNSUPPORTED: system-windows, ld_new-bug # RUN: %clang_host %p/Inputs/call-asm.c -x assembler-with-cpp %p/Inputs/thread-step-out-ret-addr-check.s -o %t # RUN: not %lldb %t -s %s -b 2>&1 | FileCheck %s diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index d75c1f532e147f..31afe5151c0661 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -1,5 +1,6 @@ # -*- Python -*- +import json import os import platform import re @@ -179,3 +180,18 @@ def calculate_arch_features(arch_string): if "LD_PRELOAD" in os.environ: config.available_features.add("ld_preload-present") + +# Determine if a specific version of Xcode's linker contains a bug. We want to +# skip affected tests if they contain this bug. +if platform.system() == "Darwin": + try: + raw_version_details = subprocess.check_output( + ("xcrun", "ld", "-version_details") + ) + version_details = json.loads(raw_version_details) + version = version_details.get("version", "0") + version_tuple = tuple(int(x) for x in version.split(".")) + if (1000,) <= version_tuple <= (1109,): + config.available_features.add("ld_new-bug") + except: + pass From 641b98a0d1e20da9500aa012ced41e53967a423f Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 7 Mar 2024 12:56:05 -0800 Subject: [PATCH 096/158] [GlobalISel] Fix crash in tryFoldAndOrOrICmpsUsingRanges() with pointer types. --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 + .../GlobalISel/combine-logic-of-compare.mir | 63 +++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2f18a64ca285bd..4862207d53f492 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6713,6 +6713,9 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, LLT CmpTy = MRI.getType(Cmp1->getReg(0)); LLT CmpOperandTy = MRI.getType(R1); + if (CmpOperandTy.isPointer()) + return false; + // We build ands, adds, and constants of type CmpOperandTy. // They must be legal to build. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) || diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir index d050823e3b9494..1eb445c03efcd6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir @@ -406,3 +406,66 @@ body: | %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>) $q0 = COPY %zext ... +--- +name: test_dont_combine_pointers +body: | + ; CHECK-LABEL: name: test_dont_combine_pointers + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -8 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[C]](p0) :: (load (p0)) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: G_BRCOND [[AND]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_BRCOND [[C3]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + %1:_(p0) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 -8 + %2:_(p0) = G_INTTOPTR %3(s64) + %6:_(s64) = G_CONSTANT i64 -16 + %5:_(p0) = G_INTTOPTR %6(s64) + %10:_(s1) = G_CONSTANT i1 false + + bb.2: + successors: %bb.4(0x60000000), %bb.3(0x20000000) + + %0:_(p0) = G_LOAD %1(p0) :: (load (p0)) + %4:_(s1) = G_ICMP intpred(eq), %0(p0), %2 + %7:_(s1) = G_ICMP intpred(eq), %0(p0), %5 + %8:_(s1) = G_OR %4, %7 + %9:_(s1) = G_SELECT %8(s1), %10, %10 + G_BRCOND %8(s1), %bb.4 + G_BR %bb.3 + + bb.4: + successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + + G_BRCOND %10(s1), %bb.2 + G_BR %bb.3 + + bb.3: + G_BR %bb.2 + +... From 143afb405a7e12e3fe1622b92f046ab2380c8981 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 7 Mar 2024 13:04:02 -0800 Subject: [PATCH 097/158] [BOLT] Add reading support for Linux kernel .altinstructions section (#84283) Read .altinstructions and annotate instructions that have alternative sequences with "AltInst" annotation. Note that some instructions may have more than one alternatives, in which case they will have multiple annotations in the form "AltInst", "AltInst2", "AltInst3", etc. --- bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 141 +++++++++++++++++++++++ bolt/test/X86/linux-alt-instruction.s | 92 +++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 bolt/test/X86/linux-alt-instruction.s diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 964a47346592fc..ecfbea3cb51185 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -27,6 +27,21 @@ using namespace bolt; namespace opts { +static cl::opt + AltInstHasPadLen("alt-inst-has-padlen", + cl::desc("specify that .altinstructions has padlen field"), + cl::init(false), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt + AltInstFeatureSize("alt-inst-feature-size", + cl::desc("size of feature field in .altinstructions"), + cl::init(2), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt + DumpAltInstructions("dump-alt-instructions", + cl::desc("dump Linux alternative instructions info"), + cl::init(false), cl::Hidden, cl::cat(BoltCategory)); + static cl::opt DumpExceptions("dump-linux-exceptions", cl::desc("dump Linux kernel exception table"), @@ -157,6 +172,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// Alignment of paravirtual patch structures. static constexpr size_t PARA_PATCH_ALIGN = 8; + /// .altinstructions section. + ErrorOr AltInstrSection = std::errc::bad_address; + /// Section containing Linux bug table. ErrorOr BugTableSection = std::errc::bad_address; @@ -205,6 +223,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { Error readBugTable(); + /// Read alternative instruction info from .altinstructions. + Error readAltInstructions(); + /// Mark instructions referenced by kernel metadata. Error markInstructions(); @@ -232,6 +253,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { if (Error E = readBugTable()) return E; + if (Error E = readAltInstructions()) + return E; + return Error::success(); } @@ -1132,6 +1156,123 @@ Error LinuxKernelRewriter::readBugTable() { return Error::success(); } +/// The kernel can replace certain instruction sequences depending on hardware +/// it is running on and features specified during boot time. The information +/// about alternative instruction sequences is stored in .altinstructions +/// section. The format of entries in this section is defined in +/// arch/x86/include/asm/alternative.h: +/// +/// struct alt_instr { +/// s32 instr_offset; +/// s32 repl_offset; +/// uXX feature; +/// u8 instrlen; +/// u8 replacementlen; +/// u8 padlen; // present in older kernels +/// } __packed; +/// +/// Note the structures is packed. +Error LinuxKernelRewriter::readAltInstructions() { + AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); + if (!AltInstrSection) + return Error::success(); + + const uint64_t Address = AltInstrSection->getAddress(); + DataExtractor DE = DataExtractor(AltInstrSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + uint64_t EntryID = 0; + DataExtractor::Cursor Cursor(0); + while (Cursor && !DE.eof(Cursor)) { + const uint64_t OrgInstAddress = + Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); + const uint64_t AltInstAddress = + Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); + const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); + const uint8_t OrgSize = DE.getU8(Cursor); + const uint8_t AltSize = DE.getU8(Cursor); + + // Older kernels may have the padlen field. + const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; + + if (!Cursor) + return createStringError(errc::executable_format_error, + "out of bounds while reading .altinstructions"); + + ++EntryID; + + if (opts::DumpAltInstructions) { + BC.outs() << "Alternative instruction entry: " << EntryID + << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) + << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) + << "\n\tFeature: 0x" << Twine::utohexstr(Feature) + << "\n\tOrgSize: " << (int)OrgSize + << "\n\tAltSize: " << (int)AltSize << '\n'; + if (opts::AltInstHasPadLen) + BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; + } + + if (AltSize > OrgSize) + return createStringError(errc::executable_format_error, + "error reading .altinstructions"); + + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); + if (!BF && opts::Verbosity) { + BC.outs() << "BOLT-INFO: no function matches address 0x" + << Twine::utohexstr(OrgInstAddress) + << " of instruction from .altinstructions\n"; + } + + BinaryFunction *AltBF = + BC.getBinaryFunctionContainingAddress(AltInstAddress); + if (AltBF && BC.shouldEmit(*AltBF)) { + BC.errs() + << "BOLT-WARNING: alternative instruction sequence found in function " + << *AltBF << '\n'; + AltBF->setIgnored(); + } + + if (!BF || !BC.shouldEmit(*BF)) + continue; + + if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) + return createStringError(errc::executable_format_error, + "error reading .altinstructions"); + + MCInst *Inst = + BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); + if (!Inst) + return createStringError(errc::executable_format_error, + "no instruction at address 0x%" PRIx64 + " referenced by .altinstructions entry %d", + OrgInstAddress, EntryID); + + // There could be more than one alternative instruction sequences for the + // same original instruction. Annotate each alternative separately. + std::string AnnotationName = "AltInst"; + unsigned N = 2; + while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) + AnnotationName = "AltInst" + std::to_string(N++); + + BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); + + // Annotate all instructions from the original sequence. Note that it's not + // the most efficient way to look for instructions in the address range, + // but since alternative instructions are uncommon, it will do for now. + for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { + Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - + BF->getAddress()); + if (Inst) + BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); + } + } + + BC.outs() << "BOLT-INFO: parsed " << EntryID + << " alternative instruction entries\n"; + + return Error::success(); +} + } // namespace std::unique_ptr diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s new file mode 100644 index 00000000000000..96e77545b654bc --- /dev/null +++ b/bolt/test/X86/linux-alt-instruction.s @@ -0,0 +1,92 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly parses the Linux kernel .altinstructions section +## and annotates alternative instructions. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops -o %t.out \ +# RUN: --alt-inst-feature-size=2 | FileCheck %s + +## Older kernels used to have padlen field in alt_instr. Check compatibility. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops --alt-inst-has-padlen \ +# RUN: -o %t.out | FileCheck %s + +## Check with a larger size of "feature" field in alt_instr. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops \ +# RUN: --alt-inst-feature-size=4 -o %t.out | FileCheck %s + +# CHECK: BOLT-INFO: Linux kernel binary detected +# CHECK: BOLT-INFO: parsed 2 alternative instruction entries + + .text + .globl _start + .type _start, %function +_start: +# CHECK: Binary Function "_start" +.L0: + rdtsc +# CHECK: rdtsc +# CHECK-SAME: AltInst: 1 +# CHECK-SAME: AltInst2: 2 + nop +# CHECK-NEXT: nop +# CHECK-SAME: AltInst: 1 +# CHECK-SAME: AltInst2: 2 + nop + nop +.L1: + ret + .size _start, .-_start + + .section .altinstr_replacement,"ax",@progbits +.A0: + lfence + rdtsc +.A1: + rdtscp +.Ae: + +## Alternative instruction info. + .section .altinstructions,"a",@progbits + + .long .L0 - . # org instruction + .long .A0 - . # alt instruction +.ifdef FEATURE_SIZE_4 + .long 0x72 # feature flags +.else + .word 0x72 # feature flags +.endif + .byte .L1 - .L0 # org size + .byte .A1 - .A0 # alt size +.ifdef PADLEN + .byte 0 +.endif + + .long .L0 - . # org instruction + .long .A1 - . # alt instruction +.ifdef FEATURE_SIZE_4 + .long 0x3b # feature flags +.else + .word 0x3b # feature flags +.endif + .byte .L1 - .L0 # org size + .byte .Ae - .A1 # alt size +.ifdef PADLEN + .byte 0 +.endif + +## Fake Linux Kernel sections. + .section __ksymtab,"a",@progbits + .section __ksymtab_gpl,"a",@progbits From 9cf9cb271bf86bda4996be9a31fa413381f2f5e3 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Thu, 7 Mar 2024 13:06:30 -0800 Subject: [PATCH 098/158] [clang] Upstream visionOS Availability & DarwinSDKInfo APIs (#84279) Admittedly a bit awkward, `visionos` is the correct and accepted spelling for annotating availability for xrOS target triples. This patch detects errors and handles cases when `xros` is mistakenly passed. In addition, add APIs for introduced/deprecated/obsoleted versioning in DarwinSDKInfo mappings. --- clang/include/clang/Basic/Attr.td | 8 ++++ clang/include/clang/Basic/DarwinSDKInfo.h | 24 ++++++++++++ clang/lib/Parse/ParseDecl.cpp | 5 ++- clang/lib/Parse/ParseExpr.cpp | 3 +- .../test/CodeGen/attr-availability-visionos.c | 10 +++++ clang/test/Sema/attr-availability-visionos.c | 39 +++++++++++++++++++ clang/unittests/Basic/DarwinSDKInfoTest.cpp | 10 +++++ 7 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/attr-availability-visionos.c create mode 100644 clang/test/Sema/attr-availability-visionos.c diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index fa191c7378dba4..ebb616fbe253fc 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -983,6 +983,8 @@ def Availability : InheritableAttr { .Case("watchos_app_extension", "watchOS (App Extension)") .Case("maccatalyst", "macCatalyst") .Case("maccatalyst_app_extension", "macCatalyst (App Extension)") + .Case("xros", "visionOS") + .Case("xros_app_extension", "visionOS (App Extension)") .Case("swift", "Swift") .Case("shadermodel", "HLSL ShaderModel") .Case("ohos", "OpenHarmony OS") @@ -1000,6 +1002,8 @@ static llvm::StringRef getPlatformNameSourceSpelling(llvm::StringRef Platform) { .Case("watchos_app_extension", "watchOSApplicationExtension") .Case("maccatalyst", "macCatalyst") .Case("maccatalyst_app_extension", "macCatalystApplicationExtension") + .Case("xros", "visionOS") + .Case("xros_app_extension", "visionOSApplicationExtension") .Case("zos", "z/OS") .Case("shadermodel", "ShaderModel") .Default(Platform); @@ -1016,6 +1020,10 @@ static llvm::StringRef canonicalizePlatformName(llvm::StringRef Platform) { .Case("watchOSApplicationExtension", "watchos_app_extension") .Case("macCatalyst", "maccatalyst") .Case("macCatalystApplicationExtension", "maccatalyst_app_extension") + .Case("visionOS", "xros") + .Case("visionOSApplicationExtension", "xros_app_extension") + .Case("visionos", "xros") + .Case("visionos_app_extension", "xros_app_extension") .Case("ShaderModel", "shadermodel") .Default(Platform); } }]; diff --git a/clang/include/clang/Basic/DarwinSDKInfo.h b/clang/include/clang/Basic/DarwinSDKInfo.h index dedfbd934a7b63..db20b968a898ea 100644 --- a/clang/include/clang/Basic/DarwinSDKInfo.h +++ b/clang/include/clang/Basic/DarwinSDKInfo.h @@ -105,6 +105,30 @@ class DarwinSDKInfo { map(const VersionTuple &Key, const VersionTuple &MinimumValue, std::optional MaximumValue) const; + /// Remap the 'introduced' availability version. + /// If None is returned, the 'unavailable' availability should be used + /// instead. + std::optional + mapIntroducedAvailabilityVersion(const VersionTuple &Key) const { + // API_TO_BE_DEPRECATED is 100000. + if (Key.getMajor() == 100000) + return VersionTuple(100000); + // Use None for maximum to force unavailable behavior for + return map(Key, MinimumValue, std::nullopt); + } + + /// Remap the 'deprecated' and 'obsoleted' availability version. + /// If None is returned for 'obsoleted', the 'unavailable' availability + /// should be used instead. If None is returned for 'deprecated', the + /// 'deprecated' version should be dropped. + std::optional + mapDeprecatedObsoletedAvailabilityVersion(const VersionTuple &Key) const { + // API_TO_BE_DEPRECATED is 100000. + if (Key.getMajor() == 100000) + return VersionTuple(100000); + return map(Key, MinimumValue, MaximumValue); + } + static std::optional parseJSON(const llvm::json::Object &Obj, VersionTuple MaximumDeploymentTarget); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 64b234eb460d24..dd179414a14191 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -1234,8 +1234,11 @@ void Parser::ParseAvailabilityAttribute( } IdentifierLoc *Platform = ParseIdentifierLoc(); if (const IdentifierInfo *const Ident = Platform->Ident) { + // Disallow xrOS for availability attributes. + if (Ident->getName().contains("xrOS") || Ident->getName().contains("xros")) + Diag(Platform->Loc, diag::warn_availability_unknown_platform) << Ident; // Canonicalize platform name from "macosx" to "macos". - if (Ident->getName() == "macosx") + else if (Ident->getName() == "macosx") Platform->Ident = PP.getIdentifierInfo("macos"); // Canonicalize platform name from "macosx_app_extension" to // "macos_app_extension". diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 4bf954b5cc4db5..1f07eddb0fb378 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3863,7 +3863,8 @@ std::optional Parser::ParseAvailabilitySpec() { StringRef Platform = AvailabilityAttr::canonicalizePlatformName(GivenPlatform); - if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) { + if (AvailabilityAttr::getPrettyPlatformName(Platform).empty() || + (GivenPlatform.contains("xros") || GivenPlatform.contains("xrOS"))) { Diag(PlatformIdentifier->Loc, diag::err_avail_query_unrecognized_platform_name) << GivenPlatform; diff --git a/clang/test/CodeGen/attr-availability-visionos.c b/clang/test/CodeGen/attr-availability-visionos.c new file mode 100644 index 00000000000000..09b98fb4a7d5e3 --- /dev/null +++ b/clang/test/CodeGen/attr-availability-visionos.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple arm64-apple-xros1 -emit-llvm -o - %s 2>&1 | FileCheck %s + +__attribute__((availability(visionOS, introduced=1.1))) +void introduced_1_1(); + +void use() { + if (__builtin_available(visionOS 1.2, *)) + introduced_1_1(); + // CHECK: call i32 @__isPlatformVersionAtLeast(i32 11, i32 1, i32 2, i32 0) +} diff --git a/clang/test/Sema/attr-availability-visionos.c b/clang/test/Sema/attr-availability-visionos.c new file mode 100644 index 00000000000000..2c388c5d529073 --- /dev/null +++ b/clang/test/Sema/attr-availability-visionos.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple arm64-apple-xros1 -fapplication-extension -verify=visionos %s 2>&1 + +__attribute__((availability(xros, unavailable))) // visionos-warning {{unknown platform 'xros' in availability macro}} +void xros_unavail(); // visionos-note {{}} + +__attribute__((availability(xros_app_extension, unavailable))) // visionos-warning {{unknown platform 'xros_app_extension' in availability macro}} +void xros_ext_unavail(); // visionos-note {{}} + +__attribute__((availability(visionOSApplicationExtension, unavailable))) +void visionos_ext_unavail(); // visionos-note {{}} + +void use() { + xros_unavail(); // visionos-error {{'xros_unavail' is unavailable: not available on visionOS}} + xros_ext_unavail(); // visionos-error {{'xros_ext_unavail' is unavailable: not available on visionOS}} + visionos_ext_unavail(); // visionos-error {{'visionos_ext_unavail' is unavailable: not available on visionOS}} +} + +__attribute__((availability(visionOS, introduced=1.0))) +void visionos_introduced_1(); + +__attribute__((availability(visionos, introduced=1.1))) +void visionos_introduced_1_1(); // visionos-note 4 {{'visionos_introduced_1_1' has been marked as being introduced in visionOS 1.1 here, but the deployment target is visionOS 1}} + +void use2() { + if (__builtin_available(iOS 16.1, *)) + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(xrOS 1.1, *)) // visionos-error {{unrecognized platform name xrOS}} + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(xros_app_extension 1, *)) // visionos-error {{unrecognized platform name xros_app_extension}} + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(visionOS 1.1, *)) + visionos_introduced_1_1(); + + visionos_introduced_1(); + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} +} diff --git a/clang/unittests/Basic/DarwinSDKInfoTest.cpp b/clang/unittests/Basic/DarwinSDKInfoTest.cpp index 5f24e6eae515d2..7214f3bc8e19f4 100644 --- a/clang/unittests/Basic/DarwinSDKInfoTest.cpp +++ b/clang/unittests/Basic/DarwinSDKInfoTest.cpp @@ -168,6 +168,16 @@ TEST(DarwinSDKInfoTest, ParseAndTestMappingIOSDerived) { EXPECT_EQ( *Mapping->map(VersionTuple(13, 0), VersionTuple(), VersionTuple(99, 99)), VersionTuple(99, 99)); + + // Verify introduced, deprecated, and obsoleted mappings. + EXPECT_EQ(Mapping->mapIntroducedAvailabilityVersion(VersionTuple(10, 1)), + VersionTuple(10.0)); + EXPECT_EQ(Mapping->mapDeprecatedObsoletedAvailabilityVersion( + VersionTuple(100000, 0)), + VersionTuple(100000)); + EXPECT_EQ( + Mapping->mapDeprecatedObsoletedAvailabilityVersion(VersionTuple(13.0)), + VersionTuple(15, 0, 99)); } TEST(DarwinSDKInfoTest, MissingKeys) { From ced1fac8a32e35b63733bda27c7f5b9a2b635403 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Thu, 7 Mar 2024 16:13:45 -0500 Subject: [PATCH 099/158] [mlir][sparse] Move n:m printing into toMLIRString (#84264) --- .../include/mlir/Dialect/SparseTensor/IR/Enums.h | 4 ++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 16 ++-------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 5563cb907e9353..33f613a46bad84 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -360,6 +360,10 @@ struct LevelType { std::string toMLIRString() const { std::string lvlStr = toFormatString(getLvlFmt()); std::string propStr = ""; + if (isa()) { + lvlStr += + "[" + std::to_string(getN()) + ", " + std::to_string(getM()) + "]"; + } if (isa()) propStr += toPropString(LevelPropNonDefault::Nonunique); diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 6ba8b46370b038..c19907a945d3bb 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -646,28 +646,16 @@ void SparseTensorEncodingAttr::printDimensions( } } -std::string getNOutOfMString(LevelType lt) { - if (isNOutOfMLT(lt)) { - unsigned n = getN(lt); - unsigned m = getM(lt); - auto output = "[" + std::to_string(n) + ", " + std::to_string(m) + "]"; - return output; - } - return ""; -} - void SparseTensorEncodingAttr::printLevels(AffineMap &map, AsmPrinter &printer, ArrayRef lvlTypes) const { for (unsigned i = 0, n = map.getNumResults() - 1; i < n; i++) { map.getResult(i).print(printer.getStream()); - printer << " : " << toMLIRString(lvlTypes[i]) - << getNOutOfMString(lvlTypes[i]) << ", "; + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; } if (map.getNumResults() >= 1) { auto lastIndex = map.getNumResults() - 1; map.getResult(lastIndex).print(printer.getStream()); - printer << " : " << toMLIRString(lvlTypes[lastIndex]) - << getNOutOfMString(lvlTypes[lastIndex]); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); } } From 167b90d0401d0fe488195c7e3d6fc1edc8fc5d94 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 7 Mar 2024 21:40:23 +0000 Subject: [PATCH 100/158] [TBAA] Add test showing tbaa.struct being generated with relaxed-alias. Add test showing that tbaa.struct is generated when using TSan with relaxed-aliasing. --- ...tbaa-struct-relaxed-aliasing-with-tsan.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp diff --git a/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp b/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp new file mode 100644 index 00000000000000..931ff2476cd1bb --- /dev/null +++ b/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \ +// RUN: FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \ +// RUN: -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \ +// RUN: FileCheck %s +// +// Check that we do not create tbaa for instructions generated for copies. +// FIXME: !tbaa.struct is generated with null node as tag. + +// CHECK: !tbaa.struct +// CHECK-NOT: !tbaa + +struct A { + short s; + int i; + char c; + int j; +}; + +void copyStruct(A *a1, A *a2) { + *a1 = *a2; +} + +void copyInt(int *a, int *b) { + *a = *b; +} From a0c7714525b696d90d2021249f9105c24ca7adcc Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Mar 2024 14:03:58 -0800 Subject: [PATCH 101/158] [RISCV] Split div vs rem scheduling information [nfc] (#84385) Allows a processor to define different latencies for the two operations. --- llvm/lib/Target/RISCV/RISCVInstrInfoM.td | 8 ++++---- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 12 ++++++++++++ llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 13 +++++++++++++ llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 12 ++++++++++++ llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td | 6 +++++- llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 6 +++++- llvm/lib/Target/RISCV/RISCVSchedule.td | 8 ++++++-- 7 files changed, 57 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 6b43d4393f7670..8ea1560e5b372e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -41,9 +41,9 @@ def DIV : ALU_rr<0b0000001, 0b100, "div">, def DIVU : ALU_rr<0b0000001, 0b101, "divu">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; def REM : ALU_rr<0b0000001, 0b110, "rem">, - Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; + Sched<[WriteIRem, ReadIRem, ReadIRem]>; def REMU : ALU_rr<0b0000001, 0b111, "remu">, - Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; + Sched<[WriteIRem, ReadIRem, ReadIRem]>; } // Predicates = [HasStdExtM] let Predicates = [HasStdExtMOrZmmul, IsRV64], IsSignExtendingOpW = 1 in { @@ -57,9 +57,9 @@ def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; def REMW : ALUW_rr<0b0000001, 0b110, "remw">, - Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; + Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>; def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, - Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; + Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>; } // Predicates = [HasStdExtM, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 60fa1a848306d8..e74c7aab7474da 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -77,6 +77,16 @@ def : WriteRes { let ReleaseAtCycles = [33]; } +// Integer remainder +def : WriteRes { + let Latency = 34; + let ReleaseAtCycles = [34]; +} +def : WriteRes { + let Latency = 33; + let ReleaseAtCycles = [33]; +} + // Memory def : WriteRes; def : WriteRes; @@ -189,6 +199,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 0430d603620b6a..b21a56bdcdd20a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -189,6 +189,7 @@ class SiFive7AnyToGPRBypass WriteREV8, WriteORCB, WriteSFB, WriteIMul, WriteIMul32, WriteIDiv, WriteIDiv32, + WriteIRem, WriteIRem32, WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; // SiFive7 machine model for scheduling and other instruction cost heuristics. @@ -273,6 +274,16 @@ def : WriteRes { let ReleaseAtCycles = [1, 33]; } +// Integer remainder +def : WriteRes { + let Latency = 66; + let ReleaseAtCycles = [1, 65]; +} +def : WriteRes { + let Latency = 34; + let ReleaseAtCycles = [1, 33]; +} + // Bitmanip let Latency = 3 in { // Rotates are in the late-B ALU. @@ -946,6 +957,8 @@ def : SiFive7AnyToGPRBypass; def : SiFive7AnyToGPRBypass; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 01398dea14a3b9..d02d34a0fb9c58 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -86,6 +86,16 @@ def : WriteRes { let ReleaseAtCycles = [1, 19]; } +// Integer remainder +def : WriteRes { + let Latency = 35; + let ReleaseAtCycles = [1, 34]; +} +def : WriteRes { + let Latency = 20; + let ReleaseAtCycles = [1, 19]; +} + let Latency = 1 in { // Bitmanip def : WriteRes; @@ -258,6 +268,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index f2c07810867bd2..9625d17e0b2600 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -54,10 +54,12 @@ def : WriteRes; def : WriteRes; def : WriteRes; -// Integer division: latency 33, inverse throughput 33 +// Integer division/remainder: latency 33, inverse throughput 33 let Latency = 33, ReleaseAtCycles = [33] in { def : WriteRes; def : WriteRes; +def : WriteRes; +def : WriteRes; } // Load/store instructions on SCR1 have latency 2 and inverse throughput 2 @@ -147,6 +149,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index 667b5983cb401c..ef491edf3671f8 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -64,11 +64,13 @@ def : WriteRes; def : WriteRes; } -// Integer division +// Integer division/remainder // SRT16 algorithm let Latency = 20, ReleaseAtCycles = [20] in { def : WriteRes; def : WriteRes; +def : WriteRes; +def : WriteRes; } // Zb* @@ -221,6 +223,8 @@ def : XS2LoadToALUBypass; def : XS2LoadToALUBypass; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 593921bfcc67ab..1d19624342d2bb 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -13,8 +13,10 @@ def WriteShiftImm : SchedWrite; // 32 or 64-bit shift by immediate operatio def WriteShiftImm32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix def WriteShiftReg : SchedWrite; // 32 or 64-bit shift by immediate operations def WriteShiftReg32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix -def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder -def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I +def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide +def WriteIDiv32 : SchedWrite; // 32-bit divide on RV64I +def WriteIRem : SchedWrite; // 32-bit or 64-bit remainder +def WriteIRem32 : SchedWrite; // 32-bit remainder on RV64I def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I def WriteJmp : SchedWrite; // Jump @@ -135,6 +137,8 @@ def ReadShiftReg : SchedRead; def ReadShiftReg32 : SchedRead; // 32-bit shift by register operations on RV64Ix def ReadIDiv : SchedRead; def ReadIDiv32 : SchedRead; +def ReadIRem : SchedRead; +def ReadIRem32 : SchedRead; def ReadIMul : SchedRead; def ReadIMul32 : SchedRead; def ReadAtomicBA : SchedRead; From f78129e2bbafdd04a71bc09fc44e0797dd08db05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 7 Mar 2024 23:05:46 +0100 Subject: [PATCH 102/158] [Orc] Add NotifyCreated callback for LLJITBuilder (#84175) This is useful to attach generators to JITDylibs or inject initial symbol definitions. --- llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index 76d16e63df2815..d5682fcaa28b79 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -311,6 +311,8 @@ class LLJITBuilderState { using PlatformSetupFunction = unique_function(LLJIT &J)>; + using NotifyCreatedFunction = std::function; + std::unique_ptr EPC; std::unique_ptr ES; std::optional JTMB; @@ -321,6 +323,7 @@ class LLJITBuilderState { CompileFunctionCreator CreateCompileFunction; unique_function PrePlatformSetup; PlatformSetupFunction SetUpPlatform; + NotifyCreatedFunction NotifyCreated; unsigned NumCompileThreads = 0; /// Called prior to JIT class construcion to fix up defaults. @@ -441,6 +444,16 @@ class LLJITBuilderSetters { return impl(); } + /// Set up a callback after successful construction of the JIT. + /// + /// This is useful to attach generators to JITDylibs or inject initial symbol + /// definitions. + SetterImpl & + setNotifyCreatedCallback(LLJITBuilderState::NotifyCreatedFunction Callback) { + impl().NotifyCreated = std::move(Callback); + return impl(); + } + /// Set the number of compile threads to use. /// /// If set to zero, compilation will be performed on the execution thread when @@ -474,6 +487,11 @@ class LLJITBuilderSetters { std::unique_ptr J(new JITType(impl(), Err)); if (Err) return std::move(Err); + + if (impl().NotifyCreated) + if (Error Err = impl().NotifyCreated(*J)) + return Err; + return std::move(J); } From 2a4a852a67eab2f8d0533c23719b1bd08d6edea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Wed, 6 Mar 2024 16:46:56 +0100 Subject: [PATCH 103/158] Reland [clang-repl] Expose setter for triple in IncrementalCompilerBuilder (#84174) With out-of-process execution the target triple can be different from the one on the host. We need an interface to configure it. Relanding this with cleanup-fixes in the unittest. --- clang/include/clang/Interpreter/Interpreter.h | 5 +- clang/lib/Interpreter/Interpreter.cpp | 12 +++-- clang/unittests/Interpreter/CMakeLists.txt | 1 + .../IncrementalCompilerBuilderTest.cpp | 47 +++++++++++++++++++ 4 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 292fa566ae7037..c8f932e95c4798 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -48,6 +48,8 @@ class IncrementalCompilerBuilder { UserArgs = Args; } + void SetTargetTriple(std::string TT) { TargetTriple = TT; } + // General C++ llvm::Expected> CreateCpp(); @@ -62,11 +64,12 @@ class IncrementalCompilerBuilder { private: static llvm::Expected> - create(std::vector &ClangArgv); + create(std::string TT, std::vector &ClangArgv); llvm::Expected> createCuda(bool device); std::vector UserArgs; + std::optional TargetTriple; llvm::StringRef OffloadArch; llvm::StringRef CudaSDKPath; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 9f97a3c6b0be9e..37696b28976428 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -132,7 +132,8 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { } // anonymous namespace llvm::Expected> -IncrementalCompilerBuilder::create(std::vector &ClangArgv) { +IncrementalCompilerBuilder::create(std::string TT, + std::vector &ClangArgv) { // If we don't know ClangArgv0 or the address of main() at this point, try // to guess it anyway (it's possible on some platforms). @@ -162,8 +163,7 @@ IncrementalCompilerBuilder::create(std::vector &ClangArgv) { TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], - llvm::sys::getProcessTriple(), Diags); + driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], TT, Diags); Driver.setCheckInputsExist(false); // the input comes from mem buffers llvm::ArrayRef RF = llvm::ArrayRef(ClangArgv); std::unique_ptr Compilation(Driver.BuildCompilation(RF)); @@ -185,7 +185,8 @@ IncrementalCompilerBuilder::CreateCpp() { Argv.push_back("-xc++"); Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); - return IncrementalCompilerBuilder::create(Argv); + std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple(); + return IncrementalCompilerBuilder::create(TT, Argv); } llvm::Expected> @@ -213,7 +214,8 @@ IncrementalCompilerBuilder::createCuda(bool device) { Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); - return IncrementalCompilerBuilder::create(Argv); + std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple(); + return IncrementalCompilerBuilder::create(TT, Argv); } llvm::Expected> diff --git a/clang/unittests/Interpreter/CMakeLists.txt b/clang/unittests/Interpreter/CMakeLists.txt index 712641afb976dd..0ddedb283e07d1 100644 --- a/clang/unittests/Interpreter/CMakeLists.txt +++ b/clang/unittests/Interpreter/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(ClangReplInterpreterTests + IncrementalCompilerBuilderTest.cpp IncrementalProcessingTest.cpp InterpreterTest.cpp CodeCompletionTest.cpp diff --git a/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp b/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp new file mode 100644 index 00000000000000..f729566f7efde6 --- /dev/null +++ b/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp @@ -0,0 +1,47 @@ +//=== unittests/Interpreter/IncrementalCompilerBuilderTest.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TargetOptions.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Interpreter/Interpreter.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +// Usually FrontendAction takes the raw pointers and wraps them back into +// unique_ptrs in InitializeFileRemapping() +static void cleanupRemappedFileBuffers(CompilerInstance &CI) { + for (const auto &RB : CI.getPreprocessorOpts().RemappedFileBuffers) { + delete RB.second; + } + CI.getPreprocessorOpts().clearRemappedFiles(); +} + +TEST(IncrementalCompilerBuilder, SetCompilerArgs) { + std::vector ClangArgv = {"-Xclang", "-ast-dump-all"}; + auto CB = clang::IncrementalCompilerBuilder(); + CB.SetCompilerArgs(ClangArgv); + auto CI = cantFail(CB.CreateCpp()); + EXPECT_TRUE(CI->getFrontendOpts().ASTDumpAll); + cleanupRemappedFileBuffers(*CI); +} + +TEST(IncrementalCompilerBuilder, SetTargetTriple) { + auto CB = clang::IncrementalCompilerBuilder(); + CB.SetTargetTriple("armv6-none-eabi"); + auto CI = cantFail(CB.CreateCpp()); + EXPECT_EQ(CI->getTargetOpts().Triple, "armv6-none-unknown-eabi"); + cleanupRemappedFileBuffers(*CI); +} + +} // end anonymous namespace From 23d2c388303982e4341f248120915328a6444b51 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 7 Mar 2024 22:18:39 +0000 Subject: [PATCH 104/158] [gn build] Port 2a4a852a67ea --- llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn index 441d57187cd2db..a20066436a3bf1 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn @@ -12,6 +12,7 @@ unittest("ClangReplInterpreterTests") { ] sources = [ "CodeCompletionTest.cpp", + "IncrementalCompilerBuilderTest.cpp", "IncrementalProcessingTest.cpp", "InterpreterTest.cpp", ] From 49b1fc4f831a047bd6ffde9ba19612c329dc5166 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Mar 2024 14:37:10 -0800 Subject: [PATCH 105/158] [CVP] Freeze Y when expanding urem x, y with X < 2Y (#84390) We're going from a single use to two independent uses, we need these two to see consistent values for undef. As an example, consider x = 0x2 when y = 0b00u1. If the sub use picks 0b0001 and the cmp use picks 0b0011, that would be incorrect. --- .../Scalar/CorrelatedValuePropagation.cpp | 9 ++++++--- .../urem-expansion.ll | 20 +++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 7a2011888ab008..de3bfb57b538d3 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -805,9 +805,12 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, Value *FrozenX = X; if (!isGuaranteedNotToBeUndef(X)) FrozenX = B.CreateFreeze(X, X->getName() + ".frozen"); - auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem"); - auto *Cmp = - B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp"); + Value *FrozenY = Y; + if (!isGuaranteedNotToBeUndef(Y)) + FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen"); + auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem"); + auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY, + Instr->getName() + ".cmp"); ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX); } else { auto *Cmp = diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll index 2af8c8f23bbd3e..8e276d010fdd1b 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll @@ -198,8 +198,9 @@ define i8 @variable.v4(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -217,8 +218,9 @@ define i8 @variable.v4.range(ptr %x.ptr, ptr %y.ptr) { ; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG0]] ; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[Y_PTR:%.*]], align 1, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -236,8 +238,9 @@ define i8 @variable.v5(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -259,8 +262,9 @@ define i8 @variable.v6(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; From 48673825f47cbac9cd7c61299ca8d01579314ae0 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 7 Mar 2024 14:28:20 -0800 Subject: [PATCH 106/158] [ORC] Deallocate FinalizedAllocs on error paths in notifyEmitted. If notifyEmitted encounters a failure (either because some plugin returned one, or because the ResourceTracker was defunct) then we need to deallocate the FinalizedAlloc manually. No testcase yet: This requires a concurrent setup -- we'll need to build some infrastructure to coordinate links and deliberately injected failures in order to reliably test this. --- llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 6ac256dff9b436..131728fd7e7e4c 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -720,14 +720,22 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR, for (auto &P : Plugins) Err = joinErrors(std::move(Err), P->notifyEmitted(MR)); - if (Err) + if (Err) { + if (FA) + Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA))); return Err; + } if (!FA) return Error::success(); - return MR.withResourceKeyDo( + Err = MR.withResourceKeyDo( [&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); }); + + if (Err) + Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA))); + + return Err; } Error ObjectLinkingLayer::handleRemoveResources(JITDylib &JD, ResourceKey K) { From 69b8bc71110aca64c74a14800e800f4b151d5d6f Mon Sep 17 00:00:00 2001 From: dyung Date: Thu, 7 Mar 2024 14:43:12 -0800 Subject: [PATCH 107/158] [Dexter] Extend XFAIL of Dexter tests to all MacOS architectures. (#83936) I am trying to bring up a MacOS buildbot targeting x86 and noticed that two Dexter tests were failing, cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp and cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp. Looking in the history for these tests, they were XFAILed for Apple Silicon in 9c46606 and are failing similar on x86 for me, so we should extend the XFAIL to all MacOS architectures. --- .../debuginfo-tests/llgdb-tests/static-member-2.cpp | 2 +- .../debuginfo-tests/llgdb-tests/static-member.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp index 79ff74cb2d0aeb..3f11ae018fc858 100644 --- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp +++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out // RUN: %test_debuginfo %s %t.out // XFAIL: gdb-clang-incompatibility -// XFAIL: system-darwin && target-aarch64 +// XFAIL: system-darwin // DEBUGGER: delete breakpoints // DEBUGGER: break static-member.cpp:33 diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp index abfa8e3337f64d..57316dfd640404 100644 --- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp +++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out // RUN: %test_debuginfo %s %t.out // XFAIL: !system-darwin && gdb-clang-incompatibility -// XFAIL: system-darwin && target-aarch64 +// XFAIL: system-darwin // DEBUGGER: delete breakpoints // DEBUGGER: break static-member.cpp:33 // DEBUGGER: r From 3a56b5a27d711aaa141c354706638bd94f7460a3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 7 Mar 2024 16:53:41 -0600 Subject: [PATCH 108/158] [CUDA] Include PTX in non-RDC mode using the new driver (#84367) Summary: The old driver embed PTX in rdc-mode and so does the `nvcc` compiler. The new drivers currently does not do this, so we should keep it consistent in this case. This simply requires adding the assembler output as an input to the offloading action that gets fed to fatbin. --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Driver/Driver.cpp | 8 ++++++++ clang/lib/Driver/ToolChains/Cuda.cpp | 22 ++++++++++++---------- clang/test/Driver/cuda-phases.cu | 25 +++++++++++++------------ 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1b901a27fd19d1..42c4a7c4d4bd14 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -391,6 +391,9 @@ RISC-V Support CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ +- PTX is no longer included by default when compiling for CUDA. Using + ``--cuda-include-ptx=all`` will return the old behavior. + CUDA Support ^^^^^^^^^^^^ diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cecd34acbc92c0..96e6ad77f5e50d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + + // Compiling CUDA in non-RDC mode uses the PTX output if available. + for (Action *Input : A->getInputs()) + if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) + DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadActions.push_back(C.MakeAction(DDep, A->getType())); + ++TCAndArch; } } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 177fd6310e7ee2..c6007d3cfab864 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -503,18 +503,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } -static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { - bool includePTX = true; - for (Arg *A : Args) { - if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || - A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) - continue; +static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { + // The new driver does not include PTX by default to avoid overhead. + bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false); + for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, + options::OPT_no_cuda_include_ptx_EQ)) { A->claim(); const StringRef ArchStr = A->getValue(); - if (ArchStr == "all" || ArchStr == gpu_arch) { - includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); - continue; - } + if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = true; + else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = false; } return includePTX; } diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 9a231091de2bdc..a1c3c9b51b1e41 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -244,31 +244,32 @@ // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) -// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \ +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s -// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda -// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output -// NEW-DRIVER-NEXT: 2: compiler, {1}, ir -// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52) +// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) +// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) +// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda) +// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object +// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object -// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image -// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object +// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) +// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda) // NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s + // NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) // NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) // NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda) @@ -277,13 +278,13 @@ // NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object +// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object // NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object // NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) // NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda) From 14171b87a3b5a403f39d78da964595175636a0ae Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Thu, 7 Mar 2024 17:58:28 -0500 Subject: [PATCH 109/158] [libc][stdfix] Add exp function for short _Accum and _Accum types. (#84391) --- libc/config/baremetal/arm/entrypoints.txt | 2 + libc/config/baremetal/riscv/entrypoints.txt | 2 + libc/config/linux/x86_64/entrypoints.txt | 2 + libc/docs/math/stdfix.rst | 2 +- libc/spec/llvm_libc_stdfix_ext.td | 3 + libc/src/__support/fixed_point/fx_rep.h | 24 ++--- libc/src/stdfix/CMakeLists.txt | 26 +++++ libc/src/stdfix/exphk.cpp | 92 +++++++++++++++++ libc/src/stdfix/exphk.h | 20 ++++ libc/src/stdfix/expk.cpp | 104 ++++++++++++++++++++ libc/src/stdfix/expk.h | 20 ++++ libc/test/src/stdfix/CMakeLists.txt | 36 +++++++ libc/test/src/stdfix/ExpTest.h | 77 +++++++++++++++ libc/test/src/stdfix/exphk_test.cpp | 13 +++ libc/test/src/stdfix/expk_test.cpp | 13 +++ 15 files changed, 423 insertions(+), 13 deletions(-) create mode 100644 libc/src/stdfix/exphk.cpp create mode 100644 libc/src/stdfix/exphk.h create mode 100644 libc/src/stdfix/expk.cpp create mode 100644 libc/src/stdfix/expk.h create mode 100644 libc/test/src/stdfix/ExpTest.h create mode 100644 libc/test/src/stdfix/exphk_test.cpp create mode 100644 libc/test/src/stdfix/expk_test.cpp diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 99796ad5edf5d5..6e4fdb03626436 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 99796ad5edf5d5..6e4fdb03626436 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 1f36f127e3c473..0b77a9e170aae1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -483,6 +483,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/docs/math/stdfix.rst b/libc/docs/math/stdfix.rst index 5e39d5c01d1e53..d8dcb0cfa4c521 100644 --- a/libc/docs/math/stdfix.rst +++ b/libc/docs/math/stdfix.rst @@ -110,7 +110,7 @@ floating point types, but are not part of the ISO/IEC TR 18037:2008 spec. +===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+ | cos | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ -| exp | | | | | | | | | | | | | +| exp | | | | | | | | |check| | | |check| | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ | log | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ diff --git a/libc/spec/llvm_libc_stdfix_ext.td b/libc/spec/llvm_libc_stdfix_ext.td index 75bde47810a6be..7bc7ec5464081b 100644 --- a/libc/spec/llvm_libc_stdfix_ext.td +++ b/libc/spec/llvm_libc_stdfix_ext.td @@ -5,6 +5,9 @@ def LLVMLibcStdfixExt : StandardSpec<"llvm_libc_stdfix_ext"> { [], // types [], // enums [ // functions + GuardedFunctionSpec<"exphk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"expk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h index 042cd2b20714c6..f13640a6c01918 100644 --- a/libc/src/__support/fixed_point/fx_rep.h +++ b/libc/src/__support/fixed_point/fx_rep.h @@ -45,7 +45,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return SFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; } LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HR; } @@ -65,7 +65,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return USFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; } LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHR; } @@ -85,7 +85,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return FRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return FRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0R; } LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5R; } @@ -105,7 +105,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return UFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; } LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UR; } @@ -125,7 +125,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return LFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; } LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LR; } @@ -145,7 +145,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ULFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; } LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULR; } @@ -165,7 +165,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return SACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; } LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HK; } @@ -185,7 +185,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return USACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; } LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHK; } @@ -205,7 +205,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0K; } LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5K; } @@ -225,7 +225,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return UACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; } LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UK; } @@ -245,7 +245,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return LACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; } LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LK; } @@ -265,7 +265,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ULACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; } LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULK; } diff --git a/libc/src/stdfix/CMakeLists.txt b/libc/src/stdfix/CMakeLists.txt index 3a1cb66b7abcaf..10d76ae31349f9 100644 --- a/libc/src/stdfix/CMakeLists.txt +++ b/libc/src/stdfix/CMakeLists.txt @@ -67,3 +67,29 @@ add_entrypoint_object( DEPENDS libc.src.__support.fixed_point.sqrt ) + +add_entrypoint_object( + exphk + HDRS + exphk.h + SRCS + exphk.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.fx_rep + libc.src.__support.CPP.bit +) + +add_entrypoint_object( + expk + HDRS + expk.h + SRCS + expk.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.fx_rep + libc.src.__support.CPP.bit +) diff --git a/libc/src/stdfix/exphk.cpp b/libc/src/stdfix/exphk.cpp new file mode 100644 index 00000000000000..19a972b390c71b --- /dev/null +++ b/libc/src/stdfix/exphk.cpp @@ -0,0 +1,92 @@ +//===-- Implementation of exphk function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exphk.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/fx_bits.h" + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up tables for exp(hi) and exp(mid). +// Generated with Sollya: +// > for i from 0 to 89 do { +// hi = floor(i/8) - 5; +// m = i/8 - floor(i/8) - 0.5; +// e_hi = nearestint(exp(hi) * 2^7) * 2^-7; +// e_mid = nearestint(exp(m) * 2^7) * 2^-7; +// print(hi, e_hi, m, e_mid); +// }; +// Notice that when i = 88 and 89, e_hi will overflow short accum range. +static constexpr short accum EXP_HI[12] = { + 0x1.0p-7hk, 0x1.0p-6hk, 0x1.8p-5hk, 0x1.1p-3hk, 0x1.78p-2hk, 0x1.0p0hk, + 0x1.5cp1hk, 0x1.d9p2hk, 0x1.416p4hk, 0x1.b4dp5hk, 0x1.28d4p7hk, SACCUM_MAX, +}; + +static constexpr short accum EXP_MID[8] = { + 0x1.38p-1hk, 0x1.6p-1hk, 0x1.9p-1hk, 0x1.c4p-1hk, + 0x1.0p0hk, 0x1.22p0hk, 0x1.48p0hk, 0x1.74p0hk, +}; + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(short accum, exphk, (short accum x)) { + using FXRep = fixed_point::FXRep; + using StorageType = typename FXRep::StorageType; + // Output overflow + if (LIBC_UNLIKELY(x >= 0x1.64p2hk)) + return FXRep::MAX(); + // Lower bound where exp(x) -> 0: + // floor(log(2^-8) * 2^7) * 2^-7 + if (LIBC_UNLIKELY(x <= -0x1.63p2hk)) + return FXRep::ZERO(); + + // Current range of x: + // -0x1.628p2 <= x <= 0x1.638p2 + // Range reduction: + // x = hi + mid + lo, + // where: + // hi is an integer + // mid * 2^3 is an integer + // |lo| <= 2^-4. + // Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) + // ~ exp(hi) * exp(mid) * (1 + lo) + // with relative errors < |lo|^2 <= 2^-8. + // exp(hi) and exp(mid) are extracted from small lookup tables. + + // Round-to-nearest 1/8, tie-to-(+Int): + constexpr short accum ONE_SIXTEENTH = 0x1.0p-4hk; + // x_rounded = floor(x + 1/16). + short accum x_rounded = ((x + ONE_SIXTEENTH) >> (FXRep::FRACTION_LEN - 3)) + << (FXRep::FRACTION_LEN - 3); + short accum lo = x - x_rounded; + + // Range of x_rounded: + // x_rounded >= floor((-0x1.628p2 + 0x1.0p-4) * 2^3) * 2^-3 + // = -0x1.6p2 = -5.5 + // To get the indices, we shift the values so that it start with 0. + // Range of indices: 0 <= indices <= 89 + StorageType indices = cpp::bit_cast((x_rounded + 0x1.6p2hk) >> + (FXRep::FRACTION_LEN - 3)); + // So we have the following relation: + // indices = (hi + mid + 44/8) * 8 + // That implies: + // hi + mid = indices/8 - 5.5 + // So for lookup tables, we can use the upper 4 bits to get: + // exp( floor(indices / 8) - 5 ) + // and lower 3 bits for: + // exp( (indices - floor(indices)) - 0.5 ) + short accum exp_hi = EXP_HI[indices >> 3]; + short accum exp_mid = EXP_MID[indices & 0x7]; + // exp(x) ~ exp(hi) * exp(mid) * (1 + lo); + return (exp_hi * (exp_mid * (0x1.0p0hk + lo))); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/exphk.h b/libc/src/stdfix/exphk.h new file mode 100644 index 00000000000000..da03bb76d53f53 --- /dev/null +++ b/libc/src/stdfix/exphk.h @@ -0,0 +1,20 @@ +//===-- Implementation header for exphk -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_EXPHK_H +#define LLVM_LIBC_SRC_STDFIX_EXPHK_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +short accum exphk(short accum x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_EXPHK_H diff --git a/libc/src/stdfix/expk.cpp b/libc/src/stdfix/expk.cpp new file mode 100644 index 00000000000000..57227fd27769cc --- /dev/null +++ b/libc/src/stdfix/expk.cpp @@ -0,0 +1,104 @@ +//===-- Implementation of expk function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "expk.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/fx_bits.h" + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up tables for exp(hi) and exp(mid). +// Generated with Sollya: +// > for i from 0 to 23 do { +// hi = i - 11; +// e_hi = nearestint(exp(hi) * 2^15) * 2^-15; +// print(e_hi, "k,"); +// }; +static constexpr accum EXP_HI[24] = { + 0x1p-15k, 0x1p-15k, 0x1p-13k, 0x1.6p-12k, + 0x1.ep-11k, 0x1.44p-9k, 0x1.bap-8k, 0x1.2cp-6k, + 0x1.97cp-5k, 0x1.153p-3k, 0x1.78b8p-2k, 0x1p0k, + 0x1.5bf1p1k, 0x1.d8e68p2k, 0x1.415e6p4k, 0x1.b4c9p5k, + 0x1.28d388p7k, 0x1.936dc6p8k, 0x1.1228858p10k, 0x1.749ea7cp11k, + 0x1.fa7157cp12k, 0x1.5829dcf8p14k, 0x1.d3c4489p15k, ACCUM_MAX, +}; + +// Generated with Sollya: +// > for i from 0 to 15 do { +// m = i/16 - 0.0625; +// e_m = nearestint(exp(m) * 2^15) * 2^-15; +// print(e_m, "k,"); +// }; +static constexpr accum EXP_MID[16] = { + 0x1.e0fcp-1k, 0x1p0k, 0x1.1082p0k, 0x1.2216p0k, + 0x1.34ccp0k, 0x1.48b6p0k, 0x1.5deap0k, 0x1.747ap0k, + 0x1.8c8p0k, 0x1.a612p0k, 0x1.c14cp0k, 0x1.de46p0k, + 0x1.fd1ep0k, 0x1.0efap1k, 0x1.2074p1k, 0x1.330ep1k, +}; + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(accum, expk, (accum x)) { + using FXRep = fixed_point::FXRep; + using StorageType = typename FXRep::StorageType; + // Output overflow + // > floor(log(2^16) * 2^15) * 2^-15 + if (LIBC_UNLIKELY(x >= 0x1.62e4p3k)) + return FXRep::MAX(); + // Lower bound where exp(x) -> 0: + // floor(log(2^-16) * 2^15) * 2^-15 + if (LIBC_UNLIKELY(x <= -0x1.62e44p3k)) + return FXRep::ZERO(); + + // Current range of x: + // -0x1.62e4p3 <= x <= 0x1.62e3cp3 + // Range reduction: + // x = hi + mid + lo, + // where: + // hi is an integer + // mid * 2^4 is an integer + // |lo| <= 2^-5. + // Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) + // ~ exp(hi) * exp(mid) * (1 + lo + lo^2 / 2) + // with relative errors < |lo|^3/2 <= 2^-16. + // exp(hi) and exp(mid) are extracted from small lookup tables. + + // Round-to-nearest 1/16, tie-to-(+Int): + constexpr accum ONE_THIRTY_SECOND = 0x1.0p-5k; + // x_rounded = floor(x + 1/16). + accum x_rounded = ((x + ONE_THIRTY_SECOND) >> (FXRep::FRACTION_LEN - 4)) + << (FXRep::FRACTION_LEN - 4); + accum lo = x - x_rounded; + + // Range of x_rounded: + // x_rounded >= floor((-0x1.62e4p3 + 0x1.0p-5) * 2^4) * 2^-4 + // = -0x1.62p3 = -11.0625 + // To get the indices, we shift the values so that it start with 0. + // Range of indices: 0 <= indices <= 355. + StorageType indices = cpp::bit_cast((x_rounded + 0x1.62p3k) >> + (FXRep::FRACTION_LEN - 4)); + // So we have the following relation: + // indices = (hi + mid + 177/16) * 16 + // That implies: + // hi + mid = indices/16 - 11.0625 + // So for lookup tables, we can use the upper 4 bits to get: + // exp( floor(indices / 16) - 11 ) + // and lower 4 bits for: + // exp( (indices - floor(indices)) - 0.0625 ) + accum exp_hi = EXP_HI[indices >> 4]; + accum exp_mid = EXP_MID[indices & 0xf]; + // exp(x) ~ exp(hi) * exp(mid) * (1 + lo); + accum l1 = 0x1.0p0k + (lo >> 1); // = 1 + lo / 2 + accum l2 = 0x1.0p0k + lo * l1; // = 1 + lo * (1 + lo / 2) = 1 + lo + lo^2/2 + return (exp_hi * (exp_mid * l2)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/expk.h b/libc/src/stdfix/expk.h new file mode 100644 index 00000000000000..4526686a200b47 --- /dev/null +++ b/libc/src/stdfix/expk.h @@ -0,0 +1,20 @@ +//===-- Implementation header for expk --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_EXPK_H +#define LLVM_LIBC_SRC_STDFIX_EXPK_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +accum expk(accum x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_EXPK_H diff --git a/libc/test/src/stdfix/CMakeLists.txt b/libc/test/src/stdfix/CMakeLists.txt index d3e122884eb40e..74a1fb13127cc3 100644 --- a/libc/test/src/stdfix/CMakeLists.txt +++ b/libc/test/src/stdfix/CMakeLists.txt @@ -96,3 +96,39 @@ add_libc_test( libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.sqrt ) + +add_libc_test( + exphk_test + SUITE + libc-stdfix-tests + HDRS + ExpTest.h + SRCS + exphk_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.exphk + libc.src.math.exp + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.FPUtil.basic_operations +) + +add_libc_test( + expk_test + SUITE + libc-stdfix-tests + HDRS + ExpTest.h + SRCS + expk_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.expk + libc.src.math.exp + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.FPUtil.basic_operations +) diff --git a/libc/test/src/stdfix/ExpTest.h b/libc/test/src/stdfix/ExpTest.h new file mode 100644 index 00000000000000..e588cebf621b90 --- /dev/null +++ b/libc/test/src/stdfix/ExpTest.h @@ -0,0 +1,77 @@ +//===-- Utility class to test integer sqrt ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/fixed_point/fx_rep.h" +#include "src/__support/fixed_point/sqrt.h" + +#include "src/math/exp.h" + +template class ExpTest : public LIBC_NAMESPACE::testing::Test { + + using FXRep = LIBC_NAMESPACE::fixed_point::FXRep; + static constexpr T zero = FXRep::ZERO(); + static constexpr T one = static_cast(1); + static constexpr T eps = FXRep::EPS(); + +public: + typedef T (*ExpFunc)(T); + + void test_special_numbers(ExpFunc func) { + EXPECT_EQ(one, func(T(0))); + EXPECT_EQ(FXRep::MAX(), func(T(30))); + EXPECT_EQ(zero, func(T(-30))); + } + + void test_range_with_step(ExpFunc func, T step, bool rel_error) { + constexpr int COUNT = 255; + constexpr double ERR = 3.0 * static_cast(eps); + double x_d = 0.0; + T x = step; + for (int i = 0; i < COUNT; ++i) { + x += step; + x_d = static_cast(x); + double y_d = static_cast(func(x)); + double result = LIBC_NAMESPACE::exp(x_d); + double errors = rel_error + ? LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0) + : LIBC_NAMESPACE::fputil::abs(y_d - result); + if (errors > ERR) { + // Print out the failure input and output. + EXPECT_EQ(x, T(0)); + EXPECT_EQ(func(x), zero); + } + ASSERT_TRUE(errors <= ERR); + } + } + + void test_positive_range(ExpFunc func) { + test_range_with_step(func, T(0x1.0p-6), /*rel_error*/ true); + } + + void test_negative_range(ExpFunc func) { + test_range_with_step(func, T(-0x1.0p-6), /*rel_error*/ false); + } +}; + +#define LIST_EXP_TESTS(Name, T, func) \ + using LlvmLibcExp##Name##Test = ExpTest; \ + TEST_F(LlvmLibcExp##Name##Test, SpecialNumbers) { \ + test_special_numbers(&func); \ + } \ + TEST_F(LlvmLibcExp##Name##Test, PositiveRange) { \ + test_positive_range(&func); \ + } \ + TEST_F(LlvmLibcExp##Name##Test, NegativeRange) { \ + test_negative_range(&func); \ + } \ + static_assert(true, "Require semicolon.") diff --git a/libc/test/src/stdfix/exphk_test.cpp b/libc/test/src/stdfix/exphk_test.cpp new file mode 100644 index 00000000000000..24e92dc902faea --- /dev/null +++ b/libc/test/src/stdfix/exphk_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for exphk -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExpTest.h" + +#include "src/stdfix/exphk.h" + +LIST_EXP_TESTS(hk, short accum, LIBC_NAMESPACE::exphk); diff --git a/libc/test/src/stdfix/expk_test.cpp b/libc/test/src/stdfix/expk_test.cpp new file mode 100644 index 00000000000000..bc322037af04a7 --- /dev/null +++ b/libc/test/src/stdfix/expk_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for expk ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExpTest.h" + +#include "src/stdfix/expk.h" + +LIST_EXP_TESTS(k, accum, LIBC_NAMESPACE::expk); From 909ab0e0d1903ad2329ca9fdf248d21330f9437f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Mar 2024 15:03:51 -0800 Subject: [PATCH 110/158] [RISCV] Insert a freeze before converting select to AND/OR. (#84232) Select blocks poison, but AND/OR do not. We need to insert a freeze to block poison propagation. This creates suboptimal codegen which I will try to fix with other patches. I'm prioritizing the correctness fix since we have 2 bug reports. Fixes #84200 and #84350 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +- llvm/test/CodeGen/RISCV/alu64.ll | 3 +- .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 16 +- llvm/test/CodeGen/RISCV/bfloat-convert.ll | 169 +- llvm/test/CodeGen/RISCV/double-convert.ll | 137 +- .../CodeGen/RISCV/double-round-conv-sat.ll | 804 +++++----- llvm/test/CodeGen/RISCV/float-convert.ll | 236 +-- .../CodeGen/RISCV/float-round-conv-sat.ll | 636 ++++---- llvm/test/CodeGen/RISCV/forced-atomics.ll | 5 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 350 +++-- llvm/test/CodeGen/RISCV/half-convert.ll | 370 +++-- .../test/CodeGen/RISCV/half-round-conv-sat.ll | 1356 +++++++++-------- llvm/test/CodeGen/RISCV/iabs.ll | 88 +- llvm/test/CodeGen/RISCV/pr84200.ll | 22 + llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 16 +- llvm/test/CodeGen/RISCV/rv32zbs.ll | 40 +- .../RISCV/rv64-legal-i32/rv64xtheadbb.ll | 16 +- .../CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll | 13 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 617 ++++---- .../CodeGen/RISCV/rvv/vec3-setcc-crash.ll | 26 +- .../CodeGen/RISCV/signed-truncation-check.ll | 9 +- 21 files changed, 2587 insertions(+), 2356 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/pr84200.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4713bd605c243b..9b748cdcf74511 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select c, -1, y) -> -c | y if (isAllOnesConstant(TrueV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, -1) -> (c-1) | y if (isAllOnesConstant(FalseV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); } // (select c, 0, y) -> (c-1) & y if (isNullConstant(TrueV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, 0) -> -c & y if (isNullConstant(FalseV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); } } @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select !x, x, y) -> x & y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, - FalseV); + DAG.getFreeze(FalseV)); } // (select x, y, x) -> x & y // (select !x, y, x) -> x | y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { - return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, - FalseV); + return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, + DAG.getFreeze(TrueV), FalseV); } } diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index f032756e007b68..e16f6abcca244c 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind { ; RV32I-LABEL: sltiu: ; RV32I: # %bb.0: ; RV32I-NEXT: sltiu a0, a0, 3 -; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index aa962d68fc5285..5914e45a153302 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: addi a2, a2, 1 -; RV32IA-NEXT: sltu a4, a3, a1 -; RV32IA-NEXT: neg a4, a4 -; RV32IA-NEXT: and a4, a4, a2 +; RV32IA-NEXT: addi a4, a2, 1 +; RV32IA-NEXT: sltu a2, a2, a1 +; RV32IA-NEXT: neg a2, a2 +; RV32IA-NEXT: and a4, a2, a4 ; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: addi a2, a2, 1 -; RV64IA-NEXT: sltu a4, a3, a1 -; RV64IA-NEXT: neg a4, a4 -; RV64IA-NEXT: and a4, a4, a2 +; RV64IA-NEXT: addi a4, a2, 1 +; RV64IA-NEXT: sltu a2, a2, a1 +; RV64IA-NEXT: neg a2, a2 +; RV64IA-NEXT: and a4, a2, a4 ; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index d533607ad54e38..0216d00be21854 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind { define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat: ; RV32IZFBFMIN: # %bb.0: # %start -; RV32IZFBFMIN-NEXT: addi sp, sp, -16 -; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: addi sp, sp, -32 +; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s1, s0 ; RV32IZFBFMIN-NEXT: lui a0, 913408 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 -; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s3, s2 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFBFMIN-NEXT: call __fixsfdi +; RV32IZFBFMIN-NEXT: and a0, s3, a0 +; RV32IZFBFMIN-NEXT: or a0, s1, a0 +; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFBFMIN-NEXT: neg a2, a2 ; RV32IZFBFMIN-NEXT: lui a4, 524288 -; RV32IZFBFMIN-NEXT: lui a2, 524288 -; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2 +; RV32IZFBFMIN-NEXT: li a5, 1 +; RV32IZFBFMIN-NEXT: lui a3, 524288 +; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFBFMIN-NEXT: # %bb.1: # %start -; RV32IZFBFMIN-NEXT: mv a2, a1 +; RV32IZFBFMIN-NEXT: mv a3, a1 ; RV32IZFBFMIN-NEXT: .LBB10_2: # %start -; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4 +; RV32IZFBFMIN-NEXT: and a0, a2, a0 +; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4 ; RV32IZFBFMIN-NEXT: # %bb.3: -; RV32IZFBFMIN-NEXT: addi a2, a4, -1 +; RV32IZFBFMIN-NEXT: addi a3, a4, -1 ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start -; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFBFMIN-NEXT: neg a4, a1 -; RV32IZFBFMIN-NEXT: and a1, a4, a2 -; RV32IZFBFMIN-NEXT: neg a2, a3 -; RV32IZFBFMIN-NEXT: neg a3, s0 -; RV32IZFBFMIN-NEXT: and a0, a3, a0 -; RV32IZFBFMIN-NEXT: or a0, a2, a0 -; RV32IZFBFMIN-NEXT: and a0, a4, a0 -; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: and a1, a2, a3 +; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 32 ; RV32IZFBFMIN-NEXT: ret ; ; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat: ; R32IDZFBFMIN: # %bb.0: # %start -; R32IDZFBFMIN-NEXT: addi sp, sp, -16 -; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: addi sp, sp, -32 +; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s1, s0 ; R32IDZFBFMIN-NEXT: lui a0, 913408 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 -; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s3, s2 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 ; R32IDZFBFMIN-NEXT: call __fixsfdi +; R32IDZFBFMIN-NEXT: and a0, s3, a0 +; R32IDZFBFMIN-NEXT: or a0, s1, a0 +; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; R32IDZFBFMIN-NEXT: neg a2, a2 ; R32IDZFBFMIN-NEXT: lui a4, 524288 -; R32IDZFBFMIN-NEXT: lui a2, 524288 -; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2 +; R32IDZFBFMIN-NEXT: li a5, 1 +; R32IDZFBFMIN-NEXT: lui a3, 524288 +; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; R32IDZFBFMIN-NEXT: # %bb.1: # %start -; R32IDZFBFMIN-NEXT: mv a2, a1 +; R32IDZFBFMIN-NEXT: mv a3, a1 ; R32IDZFBFMIN-NEXT: .LBB10_2: # %start -; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4 +; R32IDZFBFMIN-NEXT: and a0, a2, a0 +; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4 ; R32IDZFBFMIN-NEXT: # %bb.3: -; R32IDZFBFMIN-NEXT: addi a2, a4, -1 +; R32IDZFBFMIN-NEXT: addi a3, a4, -1 ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start -; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; R32IDZFBFMIN-NEXT: neg a4, a1 -; R32IDZFBFMIN-NEXT: and a1, a4, a2 -; R32IDZFBFMIN-NEXT: neg a2, a3 -; R32IDZFBFMIN-NEXT: neg a3, s0 -; R32IDZFBFMIN-NEXT: and a0, a3, a0 -; R32IDZFBFMIN-NEXT: or a0, a2, a0 -; R32IDZFBFMIN-NEXT: and a0, a4, a0 -; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: and a1, a2, a3 +; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; R32IDZFBFMIN-NEXT: addi sp, sp, 16 +; R32IDZFBFMIN-NEXT: addi sp, sp, 32 ; R32IDZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_l_bf16_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fs0, a0 +; RV32ID-NEXT: flt.s s0, fa5, fs0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fs0 +; RV32ID-NEXT: fle.s s2, fa5, fs0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: fmv.s fa0, fs0 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat: @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-NEXT: neg s0, a0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero ; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32ZFBFMIN-NEXT: neg s1, a0 +; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1 +; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1 ; CHECK32ZFBFMIN-NEXT: call __fixunssfdi ; CHECK32ZFBFMIN-NEXT: and a0, s1, a0 ; CHECK32ZFBFMIN-NEXT: or a0, s0, a0 @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index eb8ffe75ef7697..f2e37f55521bac 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -749,40 +749,47 @@ define i64 @fcvt_l_d(double %a) nounwind { define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-LABEL: fcvt_l_d_sat: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI12_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI12_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB12_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB12_2 ; RV32IFD-NEXT: # %bb.1: # %start -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB12_2: # %start -; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB12_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB12_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB12_4: # %start -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_l_d_sat: @@ -800,40 +807,45 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) -; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 +; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: lui a5, 524288 -; RV32IZFINXZDINX-NEXT: lui a3, 524288 -; RV32IZFINXZDINX-NEXT: beqz a2, .LBB12_2 +; RV32IZFINXZDINX-NEXT: li a6, 1 +; RV32IZFINXZDINX-NEXT: lui a4, 524288 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: # %start -; RV32IZFINXZDINX-NEXT: mv a3, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a4, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a4, .LBB12_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a3, a5, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB12_4: # %start -; RV32IZFINXZDINX-NEXT: feq.d a1, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: and a1, a5, a3 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a4 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1013,23 +1025,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi -; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, fa5, fs0 -; RV32IFD-NEXT: neg a2, a2 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1054,11 +1066,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero -; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0) -; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI14_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 ; RV32IZFINXZDINX-NEXT: neg a3, a3 diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index b8c6e84502408f..ff2d8e00630071 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -50,41 +50,48 @@ define signext i32 @test_floor_si32(double %x) { define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-LABEL: test_floor_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call floor ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI1_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB1_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB1_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB1_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB1_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB1_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_floor_si64: @@ -101,44 +108,47 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB1_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB1_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB1_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI1_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI1_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB1_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -208,7 +218,8 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -235,29 +246,28 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI3_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI3_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -316,41 +326,48 @@ define signext i32 @test_ceil_si32(double %x) { define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-LABEL: test_ceil_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call ceil ; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI5_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB5_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB5_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB5_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB5_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB5_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB5_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_ceil_si64: @@ -367,44 +384,47 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB5_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB5_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB5_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI5_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI5_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB5_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -474,7 +494,8 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -501,29 +522,28 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI7_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI7_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -582,41 +602,48 @@ define signext i32 @test_trunc_si32(double %x) { define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-LABEL: test_trunc_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call trunc ; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI9_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB9_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB9_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB9_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB9_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB9_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_trunc_si64: @@ -633,44 +660,47 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB9_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB9_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB9_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI9_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI9_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB9_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -740,7 +770,8 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -767,29 +798,28 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI11_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI11_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -848,41 +878,48 @@ define signext i32 @test_round_si32(double %x) { define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-LABEL: test_round_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call round ; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI13_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB13_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB13_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB13_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB13_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB13_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_round_si64: @@ -899,44 +936,47 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB13_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB13_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB13_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI13_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI13_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB13_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1006,7 +1046,8 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1033,29 +1074,28 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI15_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI15_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1114,41 +1154,48 @@ define signext i32 @test_roundeven_si32(double %x) { define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-LABEL: test_roundeven_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call roundeven ; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI17_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB17_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB17_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB17_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB17_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB17_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB17_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_roundeven_si64: @@ -1165,44 +1212,47 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB17_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB17_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB17_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI17_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI17_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB17_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1272,7 +1322,8 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1299,29 +1350,28 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI19_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI19_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1380,41 +1430,48 @@ define signext i32 @test_rint_si32(double %x) { define i64 @test_rint_si64(double %x) nounwind { ; RV32IFD-LABEL: test_rint_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call rint ; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI21_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI21_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB21_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB21_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB21_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB21_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB21_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB21_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_rint_si64: @@ -1431,44 +1488,47 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB21_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB21_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB21_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1538,7 +1598,8 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1565,29 +1626,28 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI23_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index f1e444b5b624b4..1a0e4e18291158 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -275,24 +275,26 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -613,40 +615,47 @@ define i64 @fcvt_l_s(float %a) nounwind { define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-LABEL: fcvt_l_s_sat: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: flt.s s0, fa5, fa0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 -; RV32IF-NEXT: fle.s s0, fa5, fa0 +; RV32IF-NEXT: fle.s s2, fa5, fa0 +; RV32IF-NEXT: neg s3, s2 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: and a0, s3, a0 +; RV32IF-NEXT: or a0, s1, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB12_2 +; RV32IF-NEXT: li a5, 1 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: bne s2, a5, .LBB12_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a3, a1 ; RV32IF-NEXT: .LBB12_2: # %start -; RV32IF-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: beqz a3, .LBB12_4 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: beqz s0, .LBB12_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a3, a4, -1 ; RV32IF-NEXT: .LBB12_4: # %start -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: and a1, a2, a3 +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_l_s_sat: @@ -664,35 +673,38 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 913408 ; RV32IZFINX-NEXT: fle.s s1, a0, s0 +; RV32IZFINX-NEXT: neg s2, s1 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixsfdi +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI12_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) +; RV32IZFINX-NEXT: and a0, s2, a0 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: feq.s a2, s0, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: lui a5, 524288 +; RV32IZFINX-NEXT: li a6, 1 ; RV32IZFINX-NEXT: lui a4, 524288 -; RV32IZFINX-NEXT: lui a2, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB12_2 +; RV32IZFINX-NEXT: bne s1, a6, .LBB12_2 ; RV32IZFINX-NEXT: # %bb.1: # %start -; RV32IZFINX-NEXT: mv a2, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB12_2: # %start -; RV32IZFINX-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IZFINX-NEXT: lw a1, %lo(.LCPI12_0)(a1) -; RV32IZFINX-NEXT: flt.s a3, a1, s0 +; RV32IZFINX-NEXT: and a0, a2, a0 ; RV32IZFINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: addi a2, a4, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB12_4: # %start -; RV32IZFINX-NEXT: feq.s a1, s0, s0 -; RV32IZFINX-NEXT: neg a4, a1 -; RV32IZFINX-NEXT: and a1, a4, a2 -; RV32IZFINX-NEXT: neg a2, s1 -; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: neg a2, a3 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a0, a4, a0 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -863,23 +875,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0) +; RV32IF-NEXT: flt.s a0, fa5, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x fa5, zero ; RV32IF-NEXT: fle.s a0, fa5, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: addi s1, a0, -1 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -898,19 +910,18 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: mv s0, a0 -; RV32IZFINX-NEXT: fle.s a0, zero, a0 -; RV32IZFINX-NEXT: neg s1, a0 -; RV32IZFINX-NEXT: mv a0, s0 +; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0) +; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1) +; RV32IZFINX-NEXT: flt.s a1, a1, a0 +; RV32IZFINX-NEXT: neg s0, a1 +; RV32IZFINX-NEXT: fle.s a1, zero, a0 +; RV32IZFINX-NEXT: xori a1, a1, 1 +; RV32IZFINX-NEXT: addi s1, a1, -1 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2) ; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: or a0, s0, a0 ; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: or a1, s0, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -928,36 +939,33 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; ; RV32I-LABEL: fcvt_lu_s_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 391168 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 ; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: and s3, s2, a0 -; RV32I-NEXT: lui a1, 391168 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a1, a0 -; RV32I-NEXT: or a0, a1, s3 -; RV32I-NEXT: and a2, s2, s1 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: or a1, s1, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_lu_s_sat: @@ -966,24 +974,26 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lui a1, 391168 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: neg s1, a0 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2 ; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: addi s2, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunssfdi -; RV64I-NEXT: and s1, s1, a0 -; RV64I-NEXT: lui a1, 391168 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __gtsf2 -; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: or a0, a0, s1 +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: or a0, s1, a0 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: @@ -2089,24 +2099,26 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 5e99c7eb905628..f91aac11876d41 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -37,7 +37,8 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -52,32 +53,34 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB1_4 +; RV32IF-NEXT: bne s0, a6, .LBB1_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB1_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB1_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -115,23 +118,24 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB1_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB1_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB1_6 +; RV32IZFINX-NEXT: beqz a3, .LBB1_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB1_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -180,8 +184,7 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -193,22 +196,22 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rdn ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB3_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -226,7 +229,6 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -237,21 +239,21 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rdn ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB3_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -297,7 +299,8 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -312,32 +315,34 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB5_4 +; RV32IF-NEXT: bne s0, a6, .LBB5_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB5_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB5_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -375,23 +380,24 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB5_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB5_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB5_6 +; RV32IZFINX-NEXT: beqz a3, .LBB5_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB5_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -440,8 +446,7 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -453,22 +458,22 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rup ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB7_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -486,7 +491,6 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -497,21 +501,21 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rup ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB7_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -557,7 +561,8 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -572,32 +577,34 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB9_4 +; RV32IF-NEXT: bne s0, a6, .LBB9_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB9_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -635,23 +642,24 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB9_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB9_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB9_6 +; RV32IZFINX-NEXT: beqz a3, .LBB9_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB9_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -700,8 +708,7 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -713,22 +720,22 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rtz ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB11_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -746,7 +753,6 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -757,21 +763,21 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rtz ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB11_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -817,7 +823,8 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -832,32 +839,34 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB13_4 +; RV32IF-NEXT: bne s0, a6, .LBB13_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB13_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB13_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB13_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -895,23 +904,24 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB13_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB13_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB13_6 +; RV32IZFINX-NEXT: beqz a3, .LBB13_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB13_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -960,8 +970,7 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -973,22 +982,22 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rmm ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB15_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1006,7 +1015,6 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1017,21 +1025,21 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rmm ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB15_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1077,7 +1085,8 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1092,32 +1101,34 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB17_4 +; RV32IF-NEXT: bne s0, a6, .LBB17_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB17_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB17_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB17_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1155,23 +1166,24 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB17_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB17_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB17_6 +; RV32IZFINX-NEXT: beqz a3, .LBB17_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB17_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1220,8 +1232,7 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1233,22 +1244,22 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rne ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB19_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1266,7 +1277,6 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1277,21 +1287,21 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rne ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB19_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1337,7 +1347,8 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1352,32 +1363,34 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB21_4 +; RV32IF-NEXT: bne s0, a6, .LBB21_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB21_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB21_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB21_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1415,23 +1428,24 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB21_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB21_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB21_6 +; RV32IZFINX-NEXT: beqz a3, .LBB21_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB21_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1480,8 +1494,7 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1493,22 +1506,22 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0 ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB23_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1526,7 +1539,6 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1537,21 +1549,21 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0 ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB23_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index f6a53a9d76dd35..2b198afb47a9ae 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3567,8 +3567,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: neg a3, a0 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 @@ -3672,7 +3672,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: seqz a2, a1 +; RV32-NEXT: snez a2, a1 +; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: mv a2, a4 ; RV32-NEXT: bnez a0, .LBB52_1 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 9e93ad0043a7e0..6bfacc3e9814b4 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -115,7 +115,8 @@ define i32 @utest_f64i32(double %x) { ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixunsdfdi ; RV32IF-NEXT: sltiu a2, a0, -1 -; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: snez a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: or a0, a1, a0 @@ -430,7 +431,8 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: call __fixunssfdi ; RV32-NEXT: sltiu a2, a0, -1 -; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: or a0, a1, a0 @@ -1043,8 +1045,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -1052,25 +1054,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB18_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB18_3 ; RV32IF-NEXT: j .LBB18_4 ; RV32IF-NEXT: .LBB18_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB18_4 ; RV32IF-NEXT: .LBB18_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -1078,11 +1080,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: .LBB18_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB18_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB18_11 +; RV32IF-NEXT: beq a5, a3, .LBB18_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB18_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB18_13 @@ -1142,8 +1144,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -1151,25 +1153,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB18_3 ; RV32IFD-NEXT: j .LBB18_4 ; RV32IFD-NEXT: .LBB18_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB18_4 ; RV32IFD-NEXT: .LBB18_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -1177,11 +1179,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB18_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB18_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB18_11 +; RV32IFD-NEXT: beq a5, a3, .LBB18_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB18_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB18_13 @@ -1227,8 +1229,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -1267,8 +1271,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -1440,8 +1446,8 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1449,25 +1455,25 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1475,11 +1481,11 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB21_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB21_11 +; RV32-NEXT: beq a5, a3, .LBB21_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB21_11: # %entry ; RV32-NEXT: bnez a0, .LBB21_13 @@ -1523,8 +1529,10 @@ define i64 @utest_f32i64(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -1657,8 +1665,8 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1666,25 +1674,25 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1692,11 +1700,11 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: .LBB24_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB24_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB24_11 +; RV32-NEXT: beq a5, a3, .LBB24_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB24_11: # %entry ; RV32-NEXT: bnez a0, .LBB24_13 @@ -1772,8 +1780,10 @@ define i64 @utesth_f16i64(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -2891,8 +2901,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -2900,25 +2910,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB45_3 ; RV32IF-NEXT: j .LBB45_4 ; RV32IF-NEXT: .LBB45_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB45_4 ; RV32IF-NEXT: .LBB45_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB45_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB45_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -2926,11 +2936,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: .LBB45_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB45_11 +; RV32IF-NEXT: beq a5, a3, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB45_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB45_13 @@ -2990,8 +3000,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -2999,25 +3009,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB45_3 ; RV32IFD-NEXT: j .LBB45_4 ; RV32IFD-NEXT: .LBB45_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB45_4 ; RV32IFD-NEXT: .LBB45_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB45_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB45_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -3025,11 +3035,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .LBB45_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB45_11 +; RV32IFD-NEXT: beq a5, a3, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB45_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB45_13 @@ -3073,8 +3083,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -3113,8 +3125,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -3144,30 +3158,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 8(sp) +; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB47_2 +; RV32IF-NEXT: beqz a0, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a2, 0 +; RV32IF-NEXT: slti a4, a0, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a2 +; RV32IF-NEXT: or a3, a3, a0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a2, a3, a2 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: slti a2, a2, 0 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: slti a0, a0, 0 +; RV32IF-NEXT: addi a3, a0, -1 +; RV32IF-NEXT: and a0, a3, a1 +; RV32IF-NEXT: and a1, a3, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3202,30 +3216,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 8(sp) +; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB47_2 +; RV32IFD-NEXT: beqz a0, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a2, 0 +; RV32IFD-NEXT: slti a4, a0, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a2 +; RV32IFD-NEXT: or a3, a3, a0 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 +; RV32IFD-NEXT: and a2, a3, a2 ; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: slti a2, a2, 0 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: and a0, a2, a0 -; RV32IFD-NEXT: and a1, a2, a1 +; RV32IFD-NEXT: slti a0, a0, 0 +; RV32IFD-NEXT: addi a3, a0, -1 +; RV32IFD-NEXT: and a0, a3, a1 +; RV32IFD-NEXT: and a1, a3, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3246,8 +3260,8 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3255,25 +3269,25 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: beq a1, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB48_3 ; RV32-NEXT: j .LBB48_4 ; RV32-NEXT: .LBB48_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB48_4 ; RV32-NEXT: .LBB48_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB48_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB48_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3281,11 +3295,11 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .LBB48_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB48_11 +; RV32-NEXT: beq a5, a3, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: bnez a0, .LBB48_13 @@ -3327,8 +3341,10 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3370,30 +3386,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB50_2 +; RV32-NEXT: beqz a0, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3437,8 +3453,8 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3446,25 +3462,25 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: beq a1, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB51_3 ; RV32-NEXT: j .LBB51_4 ; RV32-NEXT: .LBB51_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB51_4 ; RV32-NEXT: .LBB51_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB51_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB51_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3472,11 +3488,11 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: .LBB51_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB51_11 +; RV32-NEXT: beq a5, a3, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: bnez a0, .LBB51_13 @@ -3550,8 +3566,10 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3595,30 +3613,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB53_2 +; RV32-NEXT: beqz a0, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index daaceed3941c53..518cd7da2ab771 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2145,41 +2145,48 @@ define i64 @fcvt_l_h(half %a) nounwind { define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_l_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: addi sp, sp, -32 +; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 -; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IZFH-NEXT: neg s3, s2 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: and a0, s3, a0 +; RV32IZFH-NEXT: or a0, s1, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB10_2 +; RV32IZFH-NEXT: li a5, 1 +; RV32IZFH-NEXT: lui a3, 524288 +; RV32IZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a3, a1 ; RV32IZFH-NEXT: .LBB10_2: # %start -; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB10_4 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: beqz s0, .LBB10_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: addi a3, a4, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: and a1, a2, a3 +; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 32 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_l_h_sat: @@ -2193,41 +2200,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFH-LABEL: fcvt_l_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: addi sp, sp, -32 +; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFH-NEXT: neg s1, s0 ; RV32IDZFH-NEXT: lui a0, 913408 ; RV32IDZFH-NEXT: fmv.w.x fa5, a0 -; RV32IDZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFH-NEXT: neg s3, s2 ; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixsfdi +; RV32IDZFH-NEXT: and a0, s3, a0 +; RV32IDZFH-NEXT: or a0, s1, a0 +; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFH-NEXT: neg a2, a2 ; RV32IDZFH-NEXT: lui a4, 524288 -; RV32IDZFH-NEXT: lui a2, 524288 -; RV32IDZFH-NEXT: beqz s0, .LBB10_2 +; RV32IDZFH-NEXT: li a5, 1 +; RV32IDZFH-NEXT: lui a3, 524288 +; RV32IDZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: mv a2, a1 +; RV32IDZFH-NEXT: mv a3, a1 ; RV32IDZFH-NEXT: .LBB10_2: # %start -; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFH-NEXT: beqz a3, .LBB10_4 +; RV32IDZFH-NEXT: and a0, a2, a0 +; RV32IDZFH-NEXT: beqz s0, .LBB10_4 ; RV32IDZFH-NEXT: # %bb.3: -; RV32IDZFH-NEXT: addi a2, a4, -1 +; RV32IDZFH-NEXT: addi a3, a4, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start -; RV32IDZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFH-NEXT: neg a4, a1 -; RV32IDZFH-NEXT: and a1, a4, a2 -; RV32IDZFH-NEXT: neg a2, a3 -; RV32IDZFH-NEXT: neg a3, s0 -; RV32IDZFH-NEXT: and a0, a3, a0 -; RV32IDZFH-NEXT: or a0, a2, a0 -; RV32IDZFH-NEXT: and a0, a4, a0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: and a1, a2, a3 +; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: addi sp, sp, 32 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: @@ -2263,8 +2277,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: li a5, 1 ; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZHINX-NEXT: # %bb.1: # %start ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB10_2: # %start @@ -2316,8 +2331,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZDINXZHINX-NEXT: neg a2, a2 ; RV32IZDINXZHINX-NEXT: lui a4, 524288 +; RV32IZDINXZHINX-NEXT: li a5, 1 ; RV32IZDINXZHINX-NEXT: lui a3, 524288 -; RV32IZDINXZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZDINXZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZDINXZHINX-NEXT: # %bb.1: # %start ; RV32IZDINXZHINX-NEXT: mv a3, a1 ; RV32IZDINXZHINX-NEXT: .LBB10_2: # %start @@ -2448,42 +2464,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-ILP32-LABEL: fcvt_l_h_sat: ; RV32ID-ILP32: # %bb.0: # %start -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: addi sp, sp, -32 +; RV32ID-ILP32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 +; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) +; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: fsw fa4, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: flt.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s1, s0 ; RV32ID-ILP32-NEXT: lui a1, 913408 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: fsw fa4, 4(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: fle.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: fle.s s2, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s3, s2 ; RV32ID-ILP32-NEXT: call __fixsfdi +; RV32ID-ILP32-NEXT: and a0, s3, a0 +; RV32ID-ILP32-NEXT: or a0, s1, a0 +; RV32ID-ILP32-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: feq.s a2, fa5, fa5 +; RV32ID-ILP32-NEXT: neg a2, a2 ; RV32ID-ILP32-NEXT: lui a4, 524288 -; RV32ID-ILP32-NEXT: lui a2, 524288 -; RV32ID-ILP32-NEXT: beqz s0, .LBB10_2 +; RV32ID-ILP32-NEXT: li a5, 1 +; RV32ID-ILP32-NEXT: lui a3, 524288 +; RV32ID-ILP32-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-ILP32-NEXT: # %bb.1: # %start -; RV32ID-ILP32-NEXT: mv a2, a1 +; RV32ID-ILP32-NEXT: mv a3, a1 ; RV32ID-ILP32-NEXT: .LBB10_2: # %start -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-ILP32-NEXT: flw fa4, 4(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: flt.s a3, fa5, fa4 -; RV32ID-ILP32-NEXT: fmv.s fa5, fa4 -; RV32ID-ILP32-NEXT: beqz a3, .LBB10_4 +; RV32ID-ILP32-NEXT: and a0, a2, a0 +; RV32ID-ILP32-NEXT: beqz s0, .LBB10_4 ; RV32ID-ILP32-NEXT: # %bb.3: -; RV32ID-ILP32-NEXT: addi a2, a4, -1 +; RV32ID-ILP32-NEXT: addi a3, a4, -1 ; RV32ID-ILP32-NEXT: .LBB10_4: # %start -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: neg a4, a1 -; RV32ID-ILP32-NEXT: and a1, a4, a2 -; RV32ID-ILP32-NEXT: neg a2, a3 -; RV32ID-ILP32-NEXT: neg a3, s0 -; RV32ID-ILP32-NEXT: and a0, a3, a0 -; RV32ID-ILP32-NEXT: or a0, a2, a0 -; RV32ID-ILP32-NEXT: and a0, a4, a0 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: and a1, a2, a3 +; RV32ID-ILP32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 32 ; RV32ID-ILP32-NEXT: ret ; ; RV64ID-LP64-LABEL: fcvt_l_h_sat: @@ -2503,41 +2525,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-LABEL: fcvt_l_h_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.s fs0, fa0 +; RV32ID-NEXT: flt.s s0, fa5, fa0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fa0 +; RV32ID-NEXT: fle.s s2, fa5, fa0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; RV64ID-LABEL: fcvt_l_h_sat: @@ -2556,41 +2585,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IFZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IFZFHMIN: # %bb.0: # %start -; RV32IFZFHMIN-NEXT: addi sp, sp, -16 -; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: addi sp, sp, -32 +; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s1, s0 ; RV32IFZFHMIN-NEXT: lui a0, 913408 ; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s3, s2 ; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IFZFHMIN-NEXT: call __fixsfdi +; RV32IFZFHMIN-NEXT: and a0, s3, a0 +; RV32IFZFHMIN-NEXT: or a0, s1, a0 +; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IFZFHMIN-NEXT: neg a2, a2 ; RV32IFZFHMIN-NEXT: lui a4, 524288 -; RV32IFZFHMIN-NEXT: lui a2, 524288 -; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IFZFHMIN-NEXT: li a5, 1 +; RV32IFZFHMIN-NEXT: lui a3, 524288 +; RV32IFZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IFZFHMIN-NEXT: # %bb.1: # %start -; RV32IFZFHMIN-NEXT: mv a2, a1 +; RV32IFZFHMIN-NEXT: mv a3, a1 ; RV32IFZFHMIN-NEXT: .LBB10_2: # %start -; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IFZFHMIN-NEXT: and a0, a2, a0 +; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IFZFHMIN-NEXT: # %bb.3: -; RV32IFZFHMIN-NEXT: addi a2, a4, -1 +; RV32IFZFHMIN-NEXT: addi a3, a4, -1 ; RV32IFZFHMIN-NEXT: .LBB10_4: # %start -; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IFZFHMIN-NEXT: neg a4, a1 -; RV32IFZFHMIN-NEXT: and a1, a4, a2 -; RV32IFZFHMIN-NEXT: neg a2, a3 -; RV32IFZFHMIN-NEXT: neg a3, s0 -; RV32IFZFHMIN-NEXT: and a0, a3, a0 -; RV32IFZFHMIN-NEXT: or a0, a2, a0 -; RV32IFZFHMIN-NEXT: and a0, a4, a0 -; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: and a1, a2, a3 +; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 32 ; RV32IFZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat: @@ -2605,41 +2641,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IDZFHMIN: # %bb.0: # %start -; RV32IDZFHMIN-NEXT: addi sp, sp, -16 -; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: addi sp, sp, -32 +; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s1, s0 ; RV32IDZFHMIN-NEXT: lui a0, 913408 ; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s3, s2 ; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IDZFHMIN-NEXT: call __fixsfdi +; RV32IDZFHMIN-NEXT: and a0, s3, a0 +; RV32IDZFHMIN-NEXT: or a0, s1, a0 +; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFHMIN-NEXT: neg a2, a2 ; RV32IDZFHMIN-NEXT: lui a4, 524288 -; RV32IDZFHMIN-NEXT: lui a2, 524288 -; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IDZFHMIN-NEXT: li a5, 1 +; RV32IDZFHMIN-NEXT: lui a3, 524288 +; RV32IDZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFHMIN-NEXT: # %bb.1: # %start -; RV32IDZFHMIN-NEXT: mv a2, a1 +; RV32IDZFHMIN-NEXT: mv a3, a1 ; RV32IDZFHMIN-NEXT: .LBB10_2: # %start -; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IDZFHMIN-NEXT: and a0, a2, a0 +; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IDZFHMIN-NEXT: # %bb.3: -; RV32IDZFHMIN-NEXT: addi a2, a4, -1 +; RV32IDZFHMIN-NEXT: addi a3, a4, -1 ; RV32IDZFHMIN-NEXT: .LBB10_4: # %start -; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFHMIN-NEXT: neg a4, a1 -; RV32IDZFHMIN-NEXT: and a1, a4, a2 -; RV32IDZFHMIN-NEXT: neg a2, a3 -; RV32IDZFHMIN-NEXT: neg a3, s0 -; RV32IDZFHMIN-NEXT: and a0, a3, a0 -; RV32IDZFHMIN-NEXT: or a0, a2, a0 -; RV32IDZFHMIN-NEXT: and a0, a4, a0 -; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: and a1, a2, a3 +; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IDZFHMIN-NEXT: addi sp, sp, 32 ; RV32IDZFHMIN-NEXT: ret ; ; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat: @@ -2666,8 +2709,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZHINXMIN-NEXT: .LBB10_2: # %start @@ -2720,8 +2764,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZDINXZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZDINXZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZDINXZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZDINXZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZDINXZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_2: # %start @@ -2939,7 +2984,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: addi s1, a0, -1 ; RV32IZFH-NEXT: call __fixunssfdi ; RV32IZFH-NEXT: and a0, s1, a0 ; RV32IZFH-NEXT: or a0, s0, a0 @@ -2973,7 +3019,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: neg s0, a0 ; RV32IDZFH-NEXT: fmv.w.x fa5, zero ; RV32IDZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IDZFH-NEXT: neg s1, a0 +; RV32IDZFH-NEXT: xori a0, a0, 1 +; RV32IDZFH-NEXT: addi s1, a0, -1 ; RV32IDZFH-NEXT: call __fixunssfdi ; RV32IDZFH-NEXT: and a0, s1, a0 ; RV32IDZFH-NEXT: or a0, s0, a0 @@ -3006,7 +3053,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZHINX-NEXT: neg s0, a1 ; RV32IZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZHINX-NEXT: neg s1, a1 +; RV32IZHINX-NEXT: xori a1, a1, 1 +; RV32IZHINX-NEXT: addi s1, a1, -1 ; RV32IZHINX-NEXT: call __fixunssfdi ; RV32IZHINX-NEXT: and a0, s1, a0 ; RV32IZHINX-NEXT: or a0, s0, a0 @@ -3039,7 +3087,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZDINXZHINX-NEXT: neg s0, a1 ; RV32IZDINXZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZDINXZHINX-NEXT: neg s1, a1 +; RV32IZDINXZHINX-NEXT: xori a1, a1, 1 +; RV32IZDINXZHINX-NEXT: addi s1, a1, -1 ; RV32IZDINXZHINX-NEXT: call __fixunssfdi ; RV32IZDINXZHINX-NEXT: and a0, s1, a0 ; RV32IZDINXZHINX-NEXT: or a0, s0, a0 @@ -3138,7 +3187,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: neg s0, a1 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, zero ; RV32ID-ILP32-NEXT: fle.s a1, fa5, fa4 -; RV32ID-ILP32-NEXT: neg s1, a1 +; RV32ID-ILP32-NEXT: xori a1, a1, 1 +; RV32ID-ILP32-NEXT: addi s1, a1, -1 ; RV32ID-ILP32-NEXT: call __fixunssfdi ; RV32ID-ILP32-NEXT: and a0, s1, a0 ; RV32ID-ILP32-NEXT: or a0, s0, a0 @@ -3178,7 +3228,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 @@ -3217,7 +3268,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-NEXT: neg s0, a0 ; CHECK32-IZFHMIN-NEXT: fmv.w.x fa5, zero ; CHECK32-IZFHMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32-IZFHMIN-NEXT: neg s1, a0 +; CHECK32-IZFHMIN-NEXT: xori a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: addi s1, a0, -1 ; CHECK32-IZFHMIN-NEXT: call __fixunssfdi ; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 ; CHECK32-IZFHMIN-NEXT: or a0, s0, a0 @@ -3251,7 +3303,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0 @@ -3285,7 +3338,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZDINXZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 3f385909b0b510..647af5f5b87438 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -108,38 +108,41 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB1_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB1_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB1_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_floor_si64: @@ -177,16 +180,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI1_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB1_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB1_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -194,11 +198,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB1_6 +; RV32IZHINX-NEXT: beqz a3, .LBB1_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB1_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_floor_si64: @@ -236,39 +240,42 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB1_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB1_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB1_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB1_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_floor_si64: @@ -320,16 +327,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB1_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB1_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -337,11 +345,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB1_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB1_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB1_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_floor_si64: @@ -413,7 +421,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -457,7 +465,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -499,7 +507,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) @@ -522,25 +530,24 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB3_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -568,23 +575,22 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI3_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI3_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -622,26 +628,25 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB3_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -682,24 +687,23 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -820,38 +824,41 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB5_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB5_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB5_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_ceil_si64: @@ -889,16 +896,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI5_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB5_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB5_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -906,11 +914,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB5_6 +; RV32IZHINX-NEXT: beqz a3, .LBB5_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB5_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_ceil_si64: @@ -948,39 +956,42 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB5_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB5_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB5_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB5_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_ceil_si64: @@ -1032,16 +1043,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB5_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB5_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1049,11 +1061,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB5_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB5_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB5_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_ceil_si64: @@ -1125,7 +1137,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1169,7 +1181,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1211,7 +1223,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) @@ -1234,25 +1246,24 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB7_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1280,23 +1291,22 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI7_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI7_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -1334,26 +1344,25 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB7_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -1394,24 +1403,23 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -1532,38 +1540,41 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB9_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB9_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB9_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB9_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_trunc_si64: @@ -1601,16 +1612,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI9_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB9_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB9_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1618,11 +1630,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB9_6 +; RV32IZHINX-NEXT: beqz a3, .LBB9_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB9_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_trunc_si64: @@ -1660,39 +1672,42 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB9_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB9_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB9_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB9_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_trunc_si64: @@ -1744,16 +1759,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB9_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB9_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1761,11 +1777,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB9_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB9_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB9_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_trunc_si64: @@ -1837,7 +1853,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1881,7 +1897,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1923,7 +1939,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) @@ -1946,25 +1962,24 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB11_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1992,23 +2007,22 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI11_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI11_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2046,26 +2060,25 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB11_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2106,24 +2119,23 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2244,38 +2256,41 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB13_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB13_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB13_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB13_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB13_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB13_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_round_si64: @@ -2313,16 +2328,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI13_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB13_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB13_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2330,11 +2346,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB13_6 +; RV32IZHINX-NEXT: beqz a3, .LBB13_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB13_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_round_si64: @@ -2372,39 +2388,42 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB13_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB13_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB13_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB13_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_round_si64: @@ -2456,16 +2475,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB13_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB13_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2473,11 +2493,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB13_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB13_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB13_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_round_si64: @@ -2549,7 +2569,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -2593,7 +2613,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -2635,7 +2655,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) @@ -2658,25 +2678,24 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB15_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -2704,23 +2723,22 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI15_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI15_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2758,26 +2776,25 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB15_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2818,24 +2835,23 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2956,38 +2972,41 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB17_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB17_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB17_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB17_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB17_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB17_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_roundeven_si64: @@ -3025,16 +3044,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI17_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB17_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB17_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3042,11 +3062,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB17_6 +; RV32IZHINX-NEXT: beqz a3, .LBB17_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB17_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_roundeven_si64: @@ -3084,39 +3104,42 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB17_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB17_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB17_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB17_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_roundeven_si64: @@ -3168,16 +3191,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB17_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB17_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3185,11 +3209,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB17_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB17_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB17_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_roundeven_si64: @@ -3261,7 +3285,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -3305,7 +3329,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -3347,7 +3371,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) @@ -3370,25 +3394,24 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB19_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -3416,23 +3439,22 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI19_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI19_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -3470,26 +3492,25 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB19_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -3530,24 +3551,23 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -3668,38 +3688,41 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB21_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB21_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB21_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB21_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB21_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB21_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_rint_si64: @@ -3737,16 +3760,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB21_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB21_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3754,11 +3778,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB21_6 +; RV32IZHINX-NEXT: beqz a3, .LBB21_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB21_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_rint_si64: @@ -3796,39 +3820,42 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB21_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB21_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB21_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_rint_si64: @@ -3880,16 +3907,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB21_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3897,11 +3925,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB21_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB21_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_rint_si64: @@ -3973,7 +4001,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -4017,7 +4045,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -4059,7 +4087,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.rint.f16(half %x) @@ -4082,25 +4110,24 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB23_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -4128,23 +4155,22 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI23_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI23_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -4182,26 +4208,25 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB23_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -4242,24 +4267,23 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index cb64e24128b5e3..98c886333d69a0 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -302,56 +302,56 @@ define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB8_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -384,56 +384,56 @@ define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/pr84200.ll b/llvm/test/CodeGen/RISCV/pr84200.ll new file mode 100644 index 00000000000000..19a102b84ed062 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr84200.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; The sub nuw produces poison if the input is not 0 or 1. We must insert a +; freeze before converting the sub to AND so that we don't propagate poison. +define i64 @foo(i64 %1) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: sltiu a0, a0, 2 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret +entry: + %.urem.i = sub nuw i64 1, %1 + %.cmp.i = icmp ugt i64 %1, 1 + %2 = xor i64 %.urem.i, 1 + %3 = select i1 %.cmp.i, i64 0, i64 %2 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll index 71040bf2646d2c..4e958f5699adbf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll @@ -298,14 +298,14 @@ define i32 @not_shl_one_i32(i32 %x) { define i64 @not_shl_one_i64(i64 %x) { ; CHECK-LABEL: not_shl_one_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sll a1, a1, a0 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: slti a0, a0, 0 -; CHECK-NEXT: neg a2, a0 -; CHECK-NEXT: and a2, a2, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a1, a0, a1 +; CHECK-NEXT: addi a1, a0, -32 +; CHECK-NEXT: slti a1, a1, 0 +; CHECK-NEXT: neg a2, a1 +; CHECK-NEXT: li a3, 1 +; CHECK-NEXT: sll a0, a3, a0 +; CHECK-NEXT: and a2, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a0 ; CHECK-NEXT: not a0, a2 ; CHECK-NEXT: not a1, a1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll index ccda8f4e5dd059..30aba61ba47469 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -48,20 +48,20 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind { define i64 @bclr_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: bclr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a3, 1 -; RV32I-NEXT: sll a4, a3, a2 -; RV32I-NEXT: andi a2, a2, 63 -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: slti a5, a5, 0 -; RV32I-NEXT: neg a6, a5 -; RV32I-NEXT: and a4, a6, a4 -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: li a6, 1 +; RV32I-NEXT: sll a2, a6, a2 ; RV32I-NEXT: and a2, a5, a2 -; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: sll a3, a6, a3 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: and a3, a4, a3 ; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a0, a3, a0 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBSNOZBB-LABEL: bclr_i64: @@ -186,14 +186,14 @@ define i64 @bset_i64(i64 %a, i64 %b) nounwind { define signext i64 @bset_i64_zero(i64 signext %a) nounwind { ; RV32I-LABEL: bset_i64_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: sll a1, a1, a0 -; RV32I-NEXT: addi a0, a0, -32 -; RV32I-NEXT: slti a2, a0, 0 -; RV32I-NEXT: neg a0, a2 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: addi a1, a0, -32 +; RV32I-NEXT: slti a1, a1, 0 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: li a3, 1 +; RV32I-NEXT: sll a3, a3, a0 +; RV32I-NEXT: and a0, a2, a3 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBS-LABEL: bset_i64_zero: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index 4ec7f2660b2a35..73bfc6480b4d75 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -489,7 +489,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -513,7 +513,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) ; RV64XTHEADBB-NEXT: snez a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -542,12 +542,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -569,12 +567,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) -; RV64XTHEADBB-NEXT: addi a0, a0, 1 +; RV64XTHEADBB-NEXT: addiw a0, a0, 1 ; RV64XTHEADBB-NEXT: seqz a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: and a0, a1, a0 -; RV64XTHEADBB-NEXT: slli a0, a0, 32 -; RV64XTHEADBB-NEXT: srli a0, a0, 32 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index 68ce66cbe8537d..7feef4dad4116a 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -444,7 +444,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -481,12 +481,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -495,11 +493,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64ZBB-LABEL: ffs_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a1, a0 -; RV64ZBB-NEXT: addi a1, a1, 1 +; RV64ZBB-NEXT: addiw a1, a1, 1 ; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) %2 = add i32 %1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index cbdabab65cc678..3ada24bd9846a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -89,17 +89,17 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB2_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -257,46 +257,46 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a4, -1 ; CHECK-NOV-NEXT: srli a4, a4, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 ; CHECK-NOV-NEXT: .LBB5_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB5_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sw a4, 12(a0) -; CHECK-NOV-NEXT: sw a3, 8(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) -; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a3, 4(a0) +; CHECK-NOV-NEXT: sw a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 ; CHECK-NOV-NEXT: .LBB5_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 ; CHECK-NOV-NEXT: .LBB5_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -686,10 +686,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 24(a1) -; CHECK-NOV-NEXT: lhu s3, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 24(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu a1, 16(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -718,22 +718,22 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB8_4: # %entry ; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB8_5: # %entry -; CHECK-NOV-NEXT: sgtz a2, a3 -; CHECK-NOV-NEXT: sgtz a4, a1 -; CHECK-NOV-NEXT: sgtz a5, s1 -; CHECK-NOV-NEXT: sgtz a6, a0 +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: sgtz a4, s1 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: sgtz a6, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a0, a6, a0 +; CHECK-NOV-NEXT: and a3, a6, a3 ; CHECK-NOV-NEXT: negw a5, a5 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: negw a2, a2 -; CHECK-NOV-NEXT: and a2, a2, a3 -; CHECK-NOV-NEXT: sw a2, 12(s0) -; CHECK-NOV-NEXT: sw a1, 8(s0) -; CHECK-NOV-NEXT: sw a5, 4(s0) -; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: sw a4, 8(s0) +; CHECK-NOV-NEXT: sw a1, 4(s0) +; CHECK-NOV-NEXT: sw a3, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -929,17 +929,17 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB11_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -1101,46 +1101,46 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a4, 16 ; CHECK-NOV-NEXT: addiw a4, a4, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 ; CHECK-NOV-NEXT: .LBB14_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB14_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sh a4, 6(a0) -; CHECK-NOV-NEXT: sh a3, 4(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) -; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 ; CHECK-NOV-NEXT: .LBB14_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 ; CHECK-NOV-NEXT: .LBB14_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -1871,14 +1871,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 56(a1) -; CHECK-NOV-NEXT: lhu s3, 48(a1) -; CHECK-NOV-NEXT: lhu s4, 40(a1) -; CHECK-NOV-NEXT: lhu s5, 32(a1) -; CHECK-NOV-NEXT: lhu s6, 24(a1) -; CHECK-NOV-NEXT: lhu s7, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 56(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu s4, 16(a1) +; CHECK-NOV-NEXT: lhu s5, 24(a1) +; CHECK-NOV-NEXT: lhu s6, 32(a1) +; CHECK-NOV-NEXT: lhu s7, 40(a1) +; CHECK-NOV-NEXT: lhu a1, 48(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -1931,38 +1931,38 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB17_8: # %entry ; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB17_9: # %entry -; CHECK-NOV-NEXT: sgtz a3, a7 -; CHECK-NOV-NEXT: sgtz t0, a6 -; CHECK-NOV-NEXT: sgtz t1, a5 -; CHECK-NOV-NEXT: sgtz t2, a4 -; CHECK-NOV-NEXT: sgtz t3, a2 -; CHECK-NOV-NEXT: sgtz t4, a1 -; CHECK-NOV-NEXT: sgtz t5, s1 -; CHECK-NOV-NEXT: sgtz t6, a0 +; CHECK-NOV-NEXT: sgtz a3, a0 +; CHECK-NOV-NEXT: sgtz t0, s1 +; CHECK-NOV-NEXT: sgtz t1, a1 +; CHECK-NOV-NEXT: sgtz t2, a2 +; CHECK-NOV-NEXT: sgtz t3, a4 +; CHECK-NOV-NEXT: sgtz t4, a5 +; CHECK-NOV-NEXT: sgtz t5, a6 +; CHECK-NOV-NEXT: sgtz t6, a7 ; CHECK-NOV-NEXT: negw t6, t6 -; CHECK-NOV-NEXT: and a0, t6, a0 +; CHECK-NOV-NEXT: and a7, t6, a7 ; CHECK-NOV-NEXT: negw t5, t5 -; CHECK-NOV-NEXT: and t5, t5, s1 +; CHECK-NOV-NEXT: and a6, t5, a6 ; CHECK-NOV-NEXT: negw t4, t4 -; CHECK-NOV-NEXT: and a1, t4, a1 +; CHECK-NOV-NEXT: and a5, t4, a5 ; CHECK-NOV-NEXT: negw t3, t3 -; CHECK-NOV-NEXT: and a2, t3, a2 +; CHECK-NOV-NEXT: and a4, t3, a4 ; CHECK-NOV-NEXT: negw t2, t2 -; CHECK-NOV-NEXT: and a4, t2, a4 +; CHECK-NOV-NEXT: and a2, t2, a2 ; CHECK-NOV-NEXT: negw t1, t1 -; CHECK-NOV-NEXT: and a5, t1, a5 +; CHECK-NOV-NEXT: and a1, t1, a1 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a6, t0, a6 +; CHECK-NOV-NEXT: and t0, t0, s1 ; CHECK-NOV-NEXT: negw a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a7 -; CHECK-NOV-NEXT: sh a3, 14(s0) -; CHECK-NOV-NEXT: sh a6, 12(s0) -; CHECK-NOV-NEXT: sh a5, 10(s0) -; CHECK-NOV-NEXT: sh a4, 8(s0) -; CHECK-NOV-NEXT: sh a2, 6(s0) -; CHECK-NOV-NEXT: sh a1, 4(s0) -; CHECK-NOV-NEXT: sh t5, 2(s0) -; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: sh a0, 14(s0) +; CHECK-NOV-NEXT: sh t0, 12(s0) +; CHECK-NOV-NEXT: sh a1, 10(s0) +; CHECK-NOV-NEXT: sh a2, 8(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a7, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -2190,66 +2190,65 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB18_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB18_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB18_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB18_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB18_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB18_12 ; CHECK-NOV-NEXT: .LBB18_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB18_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB18_13 ; CHECK-NOV-NEXT: j .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB18_10 ; CHECK-NOV-NEXT: .LBB18_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB18_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB18_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB18_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB18_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2274,43 +2273,43 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB18_3 +; CHECK-V-NEXT: beqz s1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB18_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2318,26 +2317,26 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2370,19 +2369,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixunsdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2407,25 +2406,25 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2467,32 +2466,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB20_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB20_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB20_9 ; CHECK-NOV-NEXT: .LBB20_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 ; CHECK-NOV-NEXT: .LBB20_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2534,15 +2533,15 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2597,66 +2596,65 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB21_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB21_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB21_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB21_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB21_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB21_12 ; CHECK-NOV-NEXT: .LBB21_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB21_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB21_13 ; CHECK-NOV-NEXT: j .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB21_10 ; CHECK-NOV-NEXT: .LBB21_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB21_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB21_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB21_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB21_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2681,43 +2679,43 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB21_3 +; CHECK-V-NEXT: beqz s1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB21_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2725,26 +2723,26 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2777,19 +2775,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2814,25 +2812,25 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2874,32 +2872,32 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB23_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB23_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB23_9 ; CHECK-NOV-NEXT: .LBB23_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 ; CHECK-NOV-NEXT: .LBB23_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2941,15 +2939,15 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -3004,8 +3002,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s2, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 @@ -3013,60 +3011,58 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB24_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB24_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB24_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB24_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB24_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB24_12 ; CHECK-NOV-NEXT: .LBB24_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB24_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB24_13 ; CHECK-NOV-NEXT: j .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB24_10 ; CHECK-NOV-NEXT: .LBB24_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB24_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB24_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB24_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB24_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3086,8 +3082,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s2, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 @@ -3097,31 +3093,31 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB24_3 +; CHECK-V-NEXT: beqz s1, .LBB24_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB24_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB24_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB24_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -3129,26 +3125,26 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB24_12 ; CHECK-V-NEXT: .LBB24_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB24_13 ; CHECK-V-NEXT: j .LBB24_14 ; CHECK-V-NEXT: .LBB24_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB24_10 ; CHECK-V-NEXT: .LBB24_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB24_14 ; CHECK-V-NEXT: .LBB24_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB24_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3179,8 +3175,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s1, a0 @@ -3188,13 +3184,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s2 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3214,8 +3209,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s1, a0 @@ -3223,15 +3218,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3274,32 +3269,32 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB26_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB26_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB26_9 ; CHECK-NOV-NEXT: .LBB26_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 ; CHECK-NOV-NEXT: .LBB26_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -3335,15 +3330,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB26_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB26_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll index 81076e41a7cb76..122ac13cb25731 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll @@ -13,18 +13,18 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: srli a2, a0, 16 -; RV32-NEXT: srli a3, a0, 8 -; RV32-NEXT: slli a4, a0, 16 -; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: slli a3, a0, 16 +; RV32-NEXT: srli a4, a3, 24 +; RV32-NEXT: srai a3, a3, 24 ; RV32-NEXT: slli a5, a0, 24 ; RV32-NEXT: srai a5, a5, 24 ; RV32-NEXT: slli a6, a0, 8 ; RV32-NEXT: srai a6, a6, 24 ; RV32-NEXT: sgtz a6, a6 ; RV32-NEXT: sgtz a5, a5 -; RV32-NEXT: sgtz a4, a4 -; RV32-NEXT: neg a4, a4 -; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: sgtz a3, a3 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a0, a4, a0 @@ -39,19 +39,19 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: srli a2, a0, 16 -; RV64-NEXT: srli a3, a0, 8 -; RV64-NEXT: slli a4, a0, 48 -; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: srliw a2, a0, 16 +; RV64-NEXT: slli a3, a0, 48 +; RV64-NEXT: srli a4, a3, 56 +; RV64-NEXT: srai a3, a3, 56 ; RV64-NEXT: slli a5, a0, 56 ; RV64-NEXT: srai a5, a5, 56 ; RV64-NEXT: slli a6, a0, 40 ; RV64-NEXT: srai a6, a6, 56 ; RV64-NEXT: sgtz a6, a6 ; RV64-NEXT: sgtz a5, a5 -; RV64-NEXT: sgtz a4, a4 -; RV64-NEXT: negw a4, a4 -; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: sgtz a3, a3 +; RV64-NEXT: negw a3, a3 +; RV64-NEXT: and a3, a3, a4 ; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: negw a4, a5 ; RV64-NEXT: and a0, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index de36bcdb910609..069b2febc334d2 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -422,7 +422,8 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; RV32I-NEXT: lui a1, 1048560 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: sltu a1, a1, a2 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; @@ -462,7 +463,8 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, -128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, -256 ; RV32I-NEXT: xori a1, a1, 1 ; RV32I-NEXT: and a0, a0, a1 @@ -691,7 +693,8 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, 128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, 256 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret From d93a126090b6e772d3b96f201cdd44ea0d6360ef Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Thu, 7 Mar 2024 15:04:42 -0800 Subject: [PATCH 111/158] [lldb] Add ability to detect darwin host linker version to xfail tests (#83941) When Apple released its new linker, it had a subtle bug that caused LLDB's TLS tests to fail. Unfortunately this means that TLS tests are not going to work on machines that have affected versions of the linker, so we should annotate the tests so that they only work when we are confident the linker has the required fix. I'm not completely satisfied with this implementation. That being said, I believe that adding suport for linker versions in general is a non-trivial change that would require far more thought. There are a few challenges involved: - LLDB's testing infra takes an argument to change the compiler, but there's no way to switch out the linker. - There's no standard way to ask a compiler what linker it will use. - There's no standard way to ask a linker what its version is. Many platforms have the same name for their linker (ld). - Some platforms automatically switch out the linker underneath you. We do this for Windows tests (where we use LLD no matter what). Given that this is affecting the tests on our CI, I think this is an acceptable solution in the interim. --- .../Python/lldbsuite/test/lldbplatformutil.py | 27 +++++++++++++++++++ .../API/lang/c/tls_globals/TestTlsGlobals.py | 1 + 2 files changed, 28 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py index c4d063d3cc77ef..187d16aa1baa68 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py @@ -3,6 +3,7 @@ # System modules import itertools +import json import re import subprocess import sys @@ -16,6 +17,7 @@ from . import lldbtest_config import lldbsuite.test.lldbplatform as lldbplatform from lldbsuite.test.builders import get_builder +from lldbsuite.test.lldbutil import is_exe def check_first_register_readable(test_case): @@ -333,3 +335,28 @@ def expectedCompiler(compilers): return True return False + + +# This is a helper function to determine if a specific version of Xcode's linker +# contains a TLS bug. We want to skip TLS tests if they contain this bug, but +# adding a linker/linker_version conditions to a decorator is challenging due to +# the number of ways linkers can enter the build process. +def xcode15LinkerBug(): + """Returns true iff a test is running on a darwin platform and the host linker is between versions 1000 and 1109.""" + darwin_platforms = lldbplatform.translate(lldbplatform.darwin_all) + if getPlatform() not in darwin_platforms: + return False + + try: + raw_version_details = subprocess.check_output( + ("xcrun", "ld", "-version_details") + ) + version_details = json.loads(raw_version_details) + version = version_details.get("version", "0") + version_tuple = tuple(int(x) for x in version.split(".")) + if (1000,) <= version_tuple <= (1109,): + return True + except: + pass + + return False diff --git a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py index dfe29b451df0a6..2bffd2eea123a6 100644 --- a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py +++ b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py @@ -40,6 +40,7 @@ def setUp(self): @skipIfWindows @skipIf(oslist=["linux"], archs=["arm", "aarch64"]) @skipIf(oslist=no_match([lldbplatformutil.getDarwinOSTriples(), "linux"])) + @expectedFailureIf(lldbplatformutil.xcode15LinkerBug()) def test(self): """Test thread-local storage.""" self.build() From 1c01651bda46426f497c2948fe52cc25acf0e76d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 7 Mar 2024 15:09:17 -0800 Subject: [PATCH 112/158] [libc][docs] add page linking to talks (#84393) --- libc/docs/index.rst | 1 + libc/docs/talks.rst | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 libc/docs/talks.rst diff --git a/libc/docs/index.rst b/libc/docs/index.rst index a50eb080c9ee49..370fcd843974e8 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -78,6 +78,7 @@ stages there is no ABI stability in any form. dev/index.rst porting contributing + talks .. toctree:: :hidden: diff --git a/libc/docs/talks.rst b/libc/docs/talks.rst new file mode 100644 index 00000000000000..6daae9f1e7b69a --- /dev/null +++ b/libc/docs/talks.rst @@ -0,0 +1,29 @@ +===== +Talks +===== +---- +2023 +---- +* Math functions in LLVM libc or yet another correctly rounded libm - Tue Ly + + * `video `__ +* The LLVM C Library for GPUs - Joseph Huber + + * `slides `__ + * `video `__ + +---- +2022 +---- +* Using LLVM's libc - Sivachandra Reddy, Michael Jones, Tue Ly + + * `slides `__ + * `video `__ +* Using modern CPU instructions to improve LLVM's libc math library - Tue Ly + + * `slides `__ + * `video `__ +* Approximating at Scale: How strto float in LLVM’s libc is faster - Michael Jones + + * `slides `__ + * `video `__ From 10edabbcf331fdd53d27c5195de1b692a0063721 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Fri, 8 Mar 2024 02:10:53 +0300 Subject: [PATCH 113/158] [X86][GlobalISel] Enable G_SDIV/G_UDIV/G_SREM/G_UREM (#81615) * Create a libcall for s64 type for 32 bit targets. * Fix a bug in REM selection: SUBREG_TO_REG is not intended to produce a value from super registers. * Replace selector tests by end-to-end tests. Other passes check the selected MIR better. --- .../X86/GISel/X86InstructionSelector.cpp | 9 +- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 1 + .../CodeGen/X86/GlobalISel/legalize-sdiv.mir | 127 ++++++++ .../CodeGen/X86/GlobalISel/legalize-srem.mir | 127 ++++++++ .../CodeGen/X86/GlobalISel/legalize-udiv.mir | 127 ++++++++ .../CodeGen/X86/GlobalISel/legalize-urem.mir | 127 ++++++++ .../X86/GlobalISel/x86-legalize-sdiv.mir | 114 -------- .../X86/GlobalISel/x86-legalize-srem.mir | 211 -------------- .../X86/GlobalISel/x86-legalize-udiv.mir | 195 ------------- .../X86/GlobalISel/x86-legalize-urem.mir | 211 -------------- .../X86/GlobalISel/x86-select-sdiv.mir | 130 --------- .../X86/GlobalISel/x86-select-srem.mir | 213 -------------- .../X86/GlobalISel/x86-select-udiv.mir | 215 -------------- .../X86/GlobalISel/x86-select-urem.mir | 215 -------------- .../X86/GlobalISel/x86_64-legalize-sdiv.mir | 145 ---------- .../X86/GlobalISel/x86_64-legalize-srem.mir | 253 ---------------- .../X86/GlobalISel/x86_64-legalize-udiv.mir | 253 ---------------- .../X86/GlobalISel/x86_64-legalize-urem.mir | 253 ---------------- .../X86/GlobalISel/x86_64-select-sdiv.mir | 164 ----------- .../X86/GlobalISel/x86_64-select-srem.mir | 270 ----------------- .../X86/GlobalISel/x86_64-select-udiv.mir | 267 ----------------- .../X86/GlobalISel/x86_64-select-urem.mir | 273 ------------------ llvm/test/CodeGen/X86/isel-sdiv.ll | 116 ++++++++ llvm/test/CodeGen/X86/isel-srem.ll | 150 ++++++++++ llvm/test/CodeGen/X86/isel-udiv.ll | 116 ++++++++ llvm/test/CodeGen/X86/isel-urem.ll | 150 ++++++++++ 26 files changed, 1044 insertions(+), 3388 deletions(-) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir create mode 100644 llvm/test/CodeGen/X86/isel-sdiv.ll create mode 100644 llvm/test/CodeGen/X86/isel-srem.ll create mode 100644 llvm/test/CodeGen/X86/isel-udiv.ll create mode 100644 llvm/test/CodeGen/X86/isel-urem.ll diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 26932ba2c8e242..8e0f61a855661b 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1778,12 +1778,9 @@ bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, .addImm(8); // Now reference the 8-bit subreg of the result. - BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(TargetOpcode::SUBREG_TO_REG)) - .addDef(DstReg) - .addImm(0) - .addReg(ResultSuperReg) - .addImm(X86::sub_8bit); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), + DstReg) + .addReg(ResultSuperReg, 0, X86::sub_8bit); } else { BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 2e33adaed7a847..06389842ebb1ed 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -213,6 +213,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, return typeInSet(0, {s8, s16, s32})(Query) || (Is64Bit && typeInSet(0, {s64})(Query)); }) + .libcallFor({s64}) .clampScalar(0, s8, sMaxScalar); // integer shifts diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir new file mode 100644 index 00000000000000..95c69209a2c354 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_sdiv_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[SDIV]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_SDIV %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_sdiv_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[SDIV]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_SDIV %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_sdiv_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[SDIV]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_SDIV %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_sdiv_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_sdiv_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_sdiv_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__divdi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_SDIV %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir new file mode 100644 index 00000000000000..ab7d89de5aa0d4 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_srem_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[SREM]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_SREM %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_srem_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[SREM]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_SREM %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_srem_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[SREM]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_SREM %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_srem_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_srem_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SREM]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_srem_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__moddi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_SREM %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir new file mode 100644 index 00000000000000..233fada9c6c892 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_udiv_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[UDIV]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_UDIV %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_udiv_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[UDIV]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_UDIV %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_udiv_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[UDIV]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_UDIV %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_udiv_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_udiv_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UDIV]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_udiv_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__udivdi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_UDIV %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir new file mode 100644 index 00000000000000..85f6063dbd1e70 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_urem_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[UREM]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_UREM %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_urem_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[UREM]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_UREM %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_urem_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[UREM]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_UREM %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_urem_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_urem_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UREM]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_urem_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__umoddi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_UREM %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir deleted file mode 100644 index 80382db942722c..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir +++ /dev/null @@ -1,114 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir deleted file mode 100644 index 965bf635d6feb8..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir +++ /dev/null @@ -1,211 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $al = COPY [[SREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:_(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $ax = COPY [[SREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:_(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $eax = COPY [[SREM]](s32) - ; CHECK: RET 0, implicit $eax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:_(s32) = G_SREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir deleted file mode 100644 index 85c9b6d9e86bfd..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir +++ /dev/null @@ -1,195 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir deleted file mode 100644 index b6496216ac56da..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir +++ /dev/null @@ -1,211 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $al = COPY [[UREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:_(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $ax = COPY [[UREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:_(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $eax = COPY [[UREM]](s32) - ; CHECK: RET 0, implicit $eax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:_(s32) = G_UREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir deleted file mode 100644 index 653d867492dc11..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir +++ /dev/null @@ -1,130 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32_abcd = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:gr8_abcd_l = COPY [[COPY1]].sub_8bit - ; CHECK: [[COPY3:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY4:%[0-9]+]]:gr32_abcd = COPY [[COPY3]] - ; CHECK: [[COPY5:%[0-9]+]]:gr8_abcd_l = COPY [[COPY4]].sub_8bit - ; CHECK: $ax = MOVSX16rr8 [[COPY2]] - ; CHECK: IDIV8r [[COPY5]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY6:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY6]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir deleted file mode 100644 index a7f5badcdef061..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir +++ /dev/null @@ -1,213 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVSX16rr8 [[MOV8rm]] - ; CHECK: IDIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $dx - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:gpr(s32) = G_SREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir deleted file mode 100644 index 1a960f9ad9e2c6..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir +++ /dev/null @@ -1,215 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] - ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1, align 16) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 4) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - %4:gpr(s32) = G_UDIV %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir deleted file mode 100644 index 23d2892ad91104..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir +++ /dev/null @@ -1,215 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] - ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $dx - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:gpr(s32) = G_UREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir deleted file mode 100644 index faccc3750c806e..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir +++ /dev/null @@ -1,145 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) { - %res = sdiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_sdiv_i64 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_sdiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[SDIV]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_SDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir deleted file mode 100644 index f02442f2b8501e..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_srem_i64(i64 %arg1, i64 %arg2) { - %res = srem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SREM]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_srem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_srem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[SREM]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_SREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir deleted file mode 100644 index 35073e2bcb1b1c..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) { - %res = udiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_udiv_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_udiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[UDIV]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_UDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir deleted file mode 100644 index c0ca5ae74fc31f..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_urem_i64(i64 %arg1, i64 %arg2) { - %res = urem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UREM]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_urem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_urem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[UREM]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_UREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir deleted file mode 100644 index d3a1608be52a1b..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir +++ /dev/null @@ -1,164 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) { - %res = sdiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK: $ax = MOVSX16rr8 [[COPY1]] - ; CHECK: IDIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY4:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY4]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_sdiv_i64 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_sdiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK: $rax = COPY [[COPY]] - ; CHECK: CQO implicit-def $rax, implicit-def $rdx, implicit $rax - ; CHECK: IDIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY $rax - ; CHECK: $rax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_SDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir deleted file mode 100644 index 0988883145bcdf..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir +++ /dev/null @@ -1,270 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_srem_i64(i64 %arg1, i64 %arg2) { - %res = srem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK-NEXT: $ax = MOVSX16rr8 [[COPY1]] - ; CHECK-NEXT: IDIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK-NEXT: [[SHR16ri:%[0-9]+]]:gr16 = SHR16ri [[COPY4]], 8, implicit-def $eflags - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr8 = SUBREG_TO_REG 0, [[SHR16ri]], %subreg.sub_8bit - ; CHECK-NEXT: $al = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK-NEXT: $ax = COPY [[COPY1]] - ; CHECK-NEXT: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK-NEXT: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $dx - ; CHECK-NEXT: $ax = COPY [[COPY4]] - ; CHECK-NEXT: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: $eax = COPY [[COPY]] - ; CHECK-NEXT: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK-NEXT: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: $eax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_srem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_srem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: $rax = COPY [[COPY]] - ; CHECK-NEXT: CQO implicit-def $rax, implicit-def $rdx, implicit $rax - ; CHECK-NEXT: IDIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx - ; CHECK-NEXT: $rax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_SREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir deleted file mode 100644 index 71c03fd6e28fd1..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir +++ /dev/null @@ -1,267 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) { - %res = udiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK: $ax = MOVZX16rr8 [[COPY1]] - ; CHECK: DIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY4:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY4]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_udiv_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_udiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK: $rax = COPY [[COPY]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $rdx = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit - ; CHECK: DIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY $rax - ; CHECK: $rax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_UDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir deleted file mode 100644 index 657cf499949734..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir +++ /dev/null @@ -1,273 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_urem_i64(i64 %arg1, i64 %arg2) { - %res = urem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK-NEXT: $ax = MOVZX16rr8 [[COPY1]] - ; CHECK-NEXT: DIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK-NEXT: [[SHR16ri:%[0-9]+]]:gr16 = SHR16ri [[COPY4]], 8, implicit-def $eflags - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr8 = SUBREG_TO_REG 0, [[SHR16ri]], %subreg.sub_8bit - ; CHECK-NEXT: $al = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK-NEXT: $ax = COPY [[COPY1]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK-NEXT: DIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $dx - ; CHECK-NEXT: $ax = COPY [[COPY4]] - ; CHECK-NEXT: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: $eax = COPY [[COPY]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $edx = COPY [[MOV32r0_]] - ; CHECK-NEXT: DIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: $eax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_UREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_urem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_urem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: $rax = COPY [[COPY]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $rdx = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit - ; CHECK-NEXT: DIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx - ; CHECK-NEXT: $rax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_UREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/isel-sdiv.ll b/llvm/test/CodeGen/X86/isel-sdiv.ll new file mode 100644 index 00000000000000..6a6b2da8dc2f8d --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-sdiv.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) nounwind { +; X64-LABEL: test_sdiv_i8: +; X64: # %bb.0: +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: idivb %sil +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i8: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: idivb {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: cbtw +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: idivb %cl +; GISEL-X86-NEXT: retl + %ret = sdiv i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_sdiv_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: cwtd +; X64-NEXT: idivw %si +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cwtd +; DAG-X86-NEXT: idivw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: cwtd +; GISEL-X86-NEXT: idivw %cx +; GISEL-X86-NEXT: retl + %ret = sdiv i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_sdiv_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cltd +; X64-NEXT: idivl %esi +; X64-NEXT: retq +; +; X86-LABEL: test_sdiv_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cltd +; X86-NEXT: idivl {{[0-9]+}}(%esp) +; X86-NEXT: retl + %ret = sdiv i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_sdiv_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cqto +; X64-NEXT: idivq %rsi +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __divdi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __divdi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = sdiv i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-srem.ll b/llvm/test/CodeGen/X86/isel-srem.ll new file mode 100644 index 00000000000000..56716e10a9d996 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-srem.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,FAST-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,GISEL-X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,SDAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,FAST-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_srem_i8(i8 %arg1, i8 %arg2) nounwind { +; SDAG-X64-LABEL: test_srem_i8: +; SDAG-X64: # %bb.0: +; SDAG-X64-NEXT: movsbl %dil, %eax +; SDAG-X64-NEXT: idivb %sil +; SDAG-X64-NEXT: movsbl %ah, %eax +; SDAG-X64-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X64-NEXT: retq +; +; FAST-X64-LABEL: test_srem_i8: +; FAST-X64: # %bb.0: +; FAST-X64-NEXT: movsbl %dil, %eax +; FAST-X64-NEXT: idivb %sil +; FAST-X64-NEXT: shrw $8, %ax +; FAST-X64-NEXT: # kill: def $al killed $al killed $ax +; FAST-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_srem_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movsbl %dil, %eax +; GISEL-X64-NEXT: idivb %sil +; GISEL-X64-NEXT: shrw $8, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $ax +; GISEL-X64-NEXT: retq +; +; SDAG-X86-LABEL: test_srem_i8: +; SDAG-X86: # %bb.0: +; SDAG-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SDAG-X86-NEXT: idivb {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movsbl %ah, %eax +; SDAG-X86-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X86-NEXT: retl +; +; FAST-X86-LABEL: test_srem_i8: +; FAST-X86: # %bb.0: +; FAST-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; FAST-X86-NEXT: idivb {{[0-9]+}}(%esp) +; FAST-X86-NEXT: movb %ah, %al +; FAST-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: cbtw +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: idivb %cl +; GISEL-X86-NEXT: movb %ah, %al +; GISEL-X86-NEXT: retl + %ret = srem i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_srem_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_srem_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: cwtd +; X64-NEXT: idivw %si +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_srem_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cwtd +; DAG-X86-NEXT: idivw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: movl %edx, %eax +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: cwtd +; GISEL-X86-NEXT: idivw %cx +; GISEL-X86-NEXT: movl %edx, %eax +; GISEL-X86-NEXT: retl + %ret = srem i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_srem_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_srem_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cltd +; X64-NEXT: idivl %esi +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: test_srem_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cltd +; X86-NEXT: idivl {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: retl + %ret = srem i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_srem_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_srem_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cqto +; X64-NEXT: idivq %rsi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_srem_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __moddi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __moddi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = srem i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-udiv.ll b/llvm/test/CodeGen/X86/isel-udiv.ll new file mode 100644 index 00000000000000..b56b8b112fe471 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-udiv.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) nounwind { +; X64-LABEL: test_udiv_i8: +; X64: # %bb.0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: divb %sil +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i8: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: divb {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movzbl %al, %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: divb %cl +; GISEL-X86-NEXT: retl + %ret = udiv i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_udiv_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divw %si +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: xorl %edx, %edx +; DAG-X86-NEXT: divw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: divw %cx +; GISEL-X86-NEXT: retl + %ret = udiv i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_udiv_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: retq +; +; X86-LABEL: test_udiv_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl {{[0-9]+}}(%esp) +; X86-NEXT: retl + %ret = udiv i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_udiv_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divq %rsi +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __udivdi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __udivdi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = udiv i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-urem.ll b/llvm/test/CodeGen/X86/isel-urem.ll new file mode 100644 index 00000000000000..50b9c1250ff875 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-urem.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,FAST-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,GISEL-X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,SDAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,FAST-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_urem_i8(i8 %arg1, i8 %arg2) nounwind { +; SDAG-X64-LABEL: test_urem_i8: +; SDAG-X64: # %bb.0: +; SDAG-X64-NEXT: movzbl %dil, %eax +; SDAG-X64-NEXT: divb %sil +; SDAG-X64-NEXT: movzbl %ah, %eax +; SDAG-X64-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X64-NEXT: retq +; +; FAST-X64-LABEL: test_urem_i8: +; FAST-X64: # %bb.0: +; FAST-X64-NEXT: movzbl %dil, %eax +; FAST-X64-NEXT: divb %sil +; FAST-X64-NEXT: shrw $8, %ax +; FAST-X64-NEXT: # kill: def $al killed $al killed $ax +; FAST-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_urem_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movzbl %dil, %eax +; GISEL-X64-NEXT: divb %sil +; GISEL-X64-NEXT: shrw $8, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $ax +; GISEL-X64-NEXT: retq +; +; SDAG-X86-LABEL: test_urem_i8: +; SDAG-X86: # %bb.0: +; SDAG-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SDAG-X86-NEXT: divb {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzbl %ah, %eax +; SDAG-X86-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X86-NEXT: retl +; +; FAST-X86-LABEL: test_urem_i8: +; FAST-X86: # %bb.0: +; FAST-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FAST-X86-NEXT: divb {{[0-9]+}}(%esp) +; FAST-X86-NEXT: movb %ah, %al +; FAST-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movzbl %al, %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: divb %cl +; GISEL-X86-NEXT: movb %ah, %al +; GISEL-X86-NEXT: retl + %ret = urem i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_urem_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_urem_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divw %si +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_urem_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: xorl %edx, %edx +; DAG-X86-NEXT: divw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: movl %edx, %eax +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: divw %cx +; GISEL-X86-NEXT: movl %edx, %eax +; GISEL-X86-NEXT: retl + %ret = urem i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_urem_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_urem_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: test_urem_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: retl + %ret = urem i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_urem_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_urem_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divq %rsi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_urem_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __umoddi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __umoddi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = urem i64 %arg1, %arg2 + ret i64 %ret +} From 5d33f7176b002da244823ca0e6b524777890dd9d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 7 Mar 2024 15:19:27 -0800 Subject: [PATCH 114/158] Fix build: llvm::Error needs to be moved for implicit conversion to Expected. I don't know why the premerge setup didn't fail on this, but many builbots are broken right now. --- llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index d5682fcaa28b79..810a38f4a6acb8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -490,7 +490,7 @@ class LLJITBuilderSetters { if (impl().NotifyCreated) if (Error Err = impl().NotifyCreated(*J)) - return Err; + return std::move(Err); return std::move(J); } From 5669660f37ef1800f4a7852577364b024d75e3d8 Mon Sep 17 00:00:00 2001 From: Chao Chen <116223022+chencha3@users.noreply.github.com> Date: Thu, 7 Mar 2024 17:25:59 -0600 Subject: [PATCH 115/158] [MLIR] XeGPU dialect for Intel GPU - core definitions and base classes (#78483) This PR follows our previous [RFC ](https://discourse.llvm.org/t/rfc-add-xegpu-dialect-for-intel-gpus/75723) to add XeGPU dialect definition for Intel GPUs. It contains dialect, type, attributes and operators definitions, as well as testcases for semantic checks. The lowering and optimization passes will be issued with separated passes. --------- Co-authored-by: Mehdi Amini --- mlir/include/mlir/Dialect/CMakeLists.txt | 1 + .../include/mlir/Dialect/XeGPU/CMakeLists.txt | 1 + .../mlir/Dialect/XeGPU/IR/CMakeLists.txt | 14 +++++++ mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h | 29 ++++++++++++++ mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td | 14 +++++++ .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 20 ++++++++++ .../mlir/Dialect/XeGPU/IR/XeGPUDialect.td | 30 ++++++++++++++ .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 26 +++++++++++++ .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 33 ++++++++++++++++ mlir/include/mlir/InitAllDialects.h | 4 +- mlir/lib/Dialect/CMakeLists.txt | 1 + mlir/lib/Dialect/XeGPU/CMakeLists.txt | 1 + mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 15 +++++++ mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 39 +++++++++++++++++++ mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 23 +++++++++++ 15 files changed, 250 insertions(+), 1 deletion(-) create mode 100644 mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td create mode 100644 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td create mode 100644 mlir/lib/Dialect/XeGPU/CMakeLists.txt create mode 100644 mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt create mode 100644 mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp create mode 100644 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt index 9788e24e4a1d91..2da79011fa26a3 100644 --- a/mlir/include/mlir/Dialect/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/CMakeLists.txt @@ -40,3 +40,4 @@ add_subdirectory(UB) add_subdirectory(Utils) add_subdirectory(Vector) add_subdirectory(X86Vector) +add_subdirectory(XeGPU) diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt new file mode 100644 index 00000000000000..f33061b2d87cff --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(IR) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt new file mode 100644 index 00000000000000..f1740e9ed929a6 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt @@ -0,0 +1,14 @@ +add_mlir_dialect(XeGPU xegpu) +add_mlir_doc(XeGPU XeGPU Dialects/ -gen-dialect-doc -dialect=xegpu) + +set(LLVM_TARGET_DEFINITIONS XeGPU.td) +mlir_tablegen(XeGPUAttrs.h.inc -gen-attrdef-decls) +mlir_tablegen(XeGPUAttrs.cpp.inc -gen-attrdef-defs) +add_public_tablegen_target(MLIRXeGPUAttrsIncGen) +add_dependencies(mlir-headers MLIRXeGPUAttrsIncGen) + +set(LLVM_TARGET_DEFINITIONS XeGPU.td) +mlir_tablegen(XeGPUEnums.h.inc -gen-enum-decls) +mlir_tablegen(XeGPUEnums.cpp.inc -gen-enum-defs) +add_public_tablegen_target(MLIRXeGPUEnumsIncGen) +add_dependencies(mlir-headers MLIRXeGPUEnumsIncGen) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h new file mode 100644 index 00000000000000..7aaa4ecc7ee77a --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h @@ -0,0 +1,29 @@ +//===- XeGPU.h - MLIR dialect for XeGPU -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H +#define MLIR_DIALECT_XEGPU_IR_XEGPU_H + +#include + +namespace mlir { +namespace xegpu { +// placeholder +} // namespace xegpu +} // namespace mlir + +#include +#include +#define GET_ATTRDEF_CLASSES +#include +#define GET_TYPEDEF_CLASSES +#include +#define GET_OP_CLASSES +#include + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_H diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td new file mode 100644 index 00000000000000..232e962870716c --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td @@ -0,0 +1,14 @@ +//===- XeGPU.td - XeGPU dialect definition ------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPU_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUOps.td" + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td new file mode 100644 index 00000000000000..bb325c272e3324 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -0,0 +1,20 @@ +//===- XeGPUAttrs.td - XeGPU dialect attributes definition --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" + +class XeGPUAttr traits = [], + string baseCppClass = "::mlir::Attribute"> + : AttrDef { + let mnemonic = attrMnemonic; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td new file mode 100644 index 00000000000000..3851275ad30a0a --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td @@ -0,0 +1,30 @@ +//===- XeGPUDialect.td - XeGPU dialect definition -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD + +include "mlir/IR/OpBase.td" + +def XeGPU_Dialect : Dialect { + let name = "xegpu"; + let cppNamespace = "::mlir::xegpu"; + let summary = "The XeGPU dialect that models Intel GPU's ISA"; + let description = [{ + The XeGPU dialect models Intel Xe ISA semantics but works at vector and + TensorDesc data type. It provides 1:1 mappings to match Xe instructions + like DPAS and 2D block load. The matrix size being processed at this level + exactly matches the hardware instructions or the intrinsic supported by + the lower-level GPU compiler. + }]; + + // let useDefaultTypePrinterParser = true; + // let useDefaultAttributePrinterParser = true; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td new file mode 100644 index 00000000000000..5825ef9195b03f --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -0,0 +1,26 @@ +//===- XeGPUOps.td - XeGPU dialect operations definition ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" +include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td" + + +// Base class for dialect operations. This operation inherits from the base +// `Op` class in OpBase.td, and provides: +// * The parent dialect of the operation. +// * The mnemonic for the operation, or the name without the dialect prefix. +// * A list of traits for the operation. +class XeGPU_Op traits = []>: + Op; + + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td new file mode 100644 index 00000000000000..1d75bb4e2906fe --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -0,0 +1,33 @@ +//===- XeGPUTypes.td - XeGPU dialect types definition -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD + +include "mlir/IR/BuiltinTypes.td" +include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" + +def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>; +def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>; +def XeGPU_ScalarType: AnyTypeOf<[XeGPU_IntType, XeGPU_FloatType]>; +def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64, UI32, I64, I32]>; +def XeGPU_DpasOpType: VectorOfRankAndType<[2, 3], [XeGPU_ScalarType]>; +def XeGPU_OffsetType: VectorOfRankAndType<[1], [Index]>; +def XeGPU_MaskType: AnyTypeOf<[VectorOfRankAndType<[1,2], [I1]>, I1]>; +def XeGPU_ValueType: AnyTypeOf<[VectorOfRankAndType<[1,2,3,4], [XeGPU_ScalarType]>, XeGPU_ScalarType]>; +def XeGPU_Vector2DType: VectorOfRankAndType<[2], [XeGPU_ScalarType]>; + +// common base class for types in XeGPU dialect +class XeGPUTypeDef traits = [], + string baseCppClass = "::mlir::Type"> + : TypeDef { + let mnemonic = typeMnemonic; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index e508d51205f347..838bd03622a626 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -89,6 +89,7 @@ #include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Vector/Transforms/SubsetOpInterfaceImpl.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" +#include "mlir/Dialect/XeGPU/IR/XeGPU.h" #include "mlir/IR/Dialect.h" #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Target/LLVM/NVVM/Target.h" @@ -141,7 +142,8 @@ inline void registerAllDialects(DialectRegistry ®istry) { transform::TransformDialect, ub::UBDialect, vector::VectorDialect, - x86vector::X86VectorDialect>(); + x86vector::X86VectorDialect, + xegpu::XeGPUDialect>(); // clang-format on // Register all external models. diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt index c72107939cf42b..b1ba5a3bc8817d 100644 --- a/mlir/lib/Dialect/CMakeLists.txt +++ b/mlir/lib/Dialect/CMakeLists.txt @@ -40,6 +40,7 @@ add_subdirectory(UB) add_subdirectory(Utils) add_subdirectory(Vector) add_subdirectory(X86Vector) +add_subdirectory(XeGPU) set(LLVM_OPTIONAL_SOURCES Traits.cpp diff --git a/mlir/lib/Dialect/XeGPU/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/CMakeLists.txt new file mode 100644 index 00000000000000..f33061b2d87cff --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(IR) diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt new file mode 100644 index 00000000000000..2e99f39ed86d2e --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt @@ -0,0 +1,15 @@ +add_mlir_dialect_library(MLIRXeGPUDialect + XeGPUDialect.cpp + XeGPUOps.cpp + + ADDITIONAL_HEADER_DIRS + ${PROJECT_SOURCE_DIR}/include/mlir/Dialect/XeGPU + + DEPENDS + MLIRXeGPUIncGen + MLIRXeGPUAttrsIncGen + MLIRXeGPUEnumsIncGen + + LINK_LIBS PUBLIC + MLIRIR +) diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp new file mode 100644 index 00000000000000..4f839ee773476b --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -0,0 +1,39 @@ +//===- XeGPUDialect.cpp - MLIR XeGPU dialect implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace mlir { +namespace xegpu { + +void XeGPUDialect::initialize() { + addTypes< +#define GET_TYPEDEF_LIST +#include + >(); + addOperations< +#define GET_OP_LIST +#include + >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include + >(); +} + +// this file is for position occupation, +// we will add functions in following PRs. + +} // namespace xegpu +} // namespace mlir + +#include +#define GET_ATTRDEF_CLASSES +#include +#define GET_TYPEDEF_CLASSES +#include diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp new file mode 100644 index 00000000000000..0e89ac4df6ef28 --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -0,0 +1,23 @@ +//===- XeGPUOps.cpp - MLIR XeGPU ops implementation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#define DEBUG_TYPE "xegpu" + +namespace mlir { +namespace xegpu { +// this file is for position occupation, +// we will add functions in following PRs. + +} // namespace xegpu +} // namespace mlir + +#include +#define GET_OP_CLASSES +#include From a9b0d7590b9e08151243b97aa75366e988e0d6c8 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 7 Mar 2024 15:29:38 -0800 Subject: [PATCH 116/158] [BOLT] Properly propagate Cursor errors (#84378) Handle out-of-bounds reading errors correctly in LinuxKernelRewriter. --- bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 30 ++++++++++++++++-------- bolt/test/X86/linux-alt-instruction.s | 5 ++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index ecfbea3cb51185..331a61e7c3c2cd 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -500,7 +500,8 @@ Error LinuxKernelRewriter::readORCTables() { // Consume the status of the cursor. if (!IPCursor) return createStringError(errc::executable_format_error, - "out of bounds while reading ORC IP table"); + "out of bounds while reading ORC IP table: %s", + toString(IPCursor.takeError()).c_str()); if (IP < PrevIP && opts::Verbosity) BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) @@ -522,7 +523,8 @@ Error LinuxKernelRewriter::readORCTables() { // Consume the status of the cursor. if (!ORCCursor) return createStringError(errc::executable_format_error, - "out of bounds while reading ORC"); + "out of bounds while reading ORC: %s", + toString(ORCCursor.takeError()).c_str()); if (Entry.ORC == NullORC) continue; @@ -843,7 +845,8 @@ Error LinuxKernelRewriter::readStaticCalls() { // Consume the status of the cursor. if (!Cursor) return createStringError(errc::executable_format_error, - "out of bounds while reading static calls"); + "out of bounds while reading static calls: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -954,8 +957,10 @@ Error LinuxKernelRewriter::readExceptionTable() { // Consume the status of the cursor. if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while reading exception table"); + return createStringError( + errc::executable_format_error, + "out of bounds while reading exception table: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1061,8 +1066,10 @@ Error LinuxKernelRewriter::readParaInstructions() { const uint8_t Len = DE.getU8(Cursor); if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while reading .parainstructions"); + return createStringError( + errc::executable_format_error, + "out of bounds while reading .parainstructions: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1129,7 +1136,8 @@ Error LinuxKernelRewriter::readBugTable() { if (!Cursor) return createStringError(errc::executable_format_error, - "out of bounds while reading __bug_table"); + "out of bounds while reading __bug_table: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1196,8 +1204,10 @@ Error LinuxKernelRewriter::readAltInstructions() { const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while reading .altinstructions"); + return createStringError( + errc::executable_format_error, + "out of bounds while reading .altinstructions: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s index 96e77545b654bc..5dcc6fe3ab0c81 100644 --- a/bolt/test/X86/linux-alt-instruction.s +++ b/bolt/test/X86/linux-alt-instruction.s @@ -27,6 +27,11 @@ # RUN: llvm-bolt %t.exe --print-normalized --keep-nops \ # RUN: --alt-inst-feature-size=4 -o %t.out | FileCheck %s +## Check that out-of-bounds read is handled properly. + +# RUN: not llvm-bolt %t.exe --print-normalized --keep-nops \ +# RUN: --alt-inst-feature-size=2 -o %t.out + # CHECK: BOLT-INFO: Linux kernel binary detected # CHECK: BOLT-INFO: parsed 2 alternative instruction entries From 50ae8a2a38b618d76193bed04b1d7df6890d5c8a Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Thu, 7 Mar 2024 15:32:44 -0800 Subject: [PATCH 117/158] [InstallAPI] Collect global functions (#83952) * Include whether functions are inlinable as they impact whether to add them into the tbd file and for future verification. * Fix how clang arguments got passed along, previously spacing was passed along to CC1 causing search path inputs to look non-existent. --- clang/include/clang/InstallAPI/Frontend.h | 5 +- clang/include/clang/InstallAPI/Visitor.h | 3 + clang/lib/InstallAPI/Frontend.cpp | 5 +- clang/lib/InstallAPI/Visitor.cpp | 78 +++++++++++++++++++++++ clang/test/InstallAPI/functions.test | 78 +++++++++++++++++++++++ clang/tools/clang-installapi/Options.cpp | 4 +- llvm/include/llvm/TextAPI/Record.h | 6 +- llvm/include/llvm/TextAPI/RecordsSlice.h | 5 +- llvm/lib/TextAPI/RecordsSlice.cpp | 6 +- llvm/unittests/TextAPI/RecordTests.cpp | 3 +- 10 files changed, 182 insertions(+), 11 deletions(-) create mode 100644 clang/test/InstallAPI/functions.test diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h index 8774321e990c13..cbc2b159ebd17a 100644 --- a/clang/include/clang/InstallAPI/Frontend.h +++ b/clang/include/clang/InstallAPI/Frontend.h @@ -50,12 +50,15 @@ class FrontendRecordsSlice : public llvm::MachO::RecordsSlice { /// \param D The pointer to the declaration from traversing AST. /// \param Access The intended access level of symbol. /// \param Flags The flags that describe attributes of the symbol. + /// \param Inlined Whether declaration is inlined, only applicable to + /// functions. /// \return The non-owning pointer to added record in slice. GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access, - SymbolFlags Flags = SymbolFlags::None); + SymbolFlags Flags = SymbolFlags::None, + bool Inlined = false); /// Add ObjC Class record with attributes from AST. /// diff --git a/clang/include/clang/InstallAPI/Visitor.h b/clang/include/clang/InstallAPI/Visitor.h index ff0a9957aa86bc..71d4d9894f4205 100644 --- a/clang/include/clang/InstallAPI/Visitor.h +++ b/clang/include/clang/InstallAPI/Visitor.h @@ -37,6 +37,9 @@ class InstallAPIVisitor final : public ASTConsumer, /// Collect global variables. bool VisitVarDecl(const VarDecl *D); + /// Collect global functions. + bool VisitFunctionDecl(const FunctionDecl *D); + /// Collect Objective-C Interface declarations. /// Every Objective-C class has an interface declaration that lists all the /// ivars, properties, and methods of the class. diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 240a80e1d3d82c..1edbdf5bb98360 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -19,9 +19,10 @@ namespace clang::installapi { GlobalRecord *FrontendRecordsSlice::addGlobal( StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access, - SymbolFlags Flags) { + SymbolFlags Flags, bool Inlined) { - auto *GR = llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags); + auto *GR = + llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags, Inlined); FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}}); return GR; } diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp index fbe6f1dabe005d..1f2ef08e5aa252 100644 --- a/clang/lib/InstallAPI/Visitor.cpp +++ b/clang/lib/InstallAPI/Visitor.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/InstallAPI/Visitor.h" +#include "clang/AST/ParentMapContext.h" #include "clang/Basic/Linkage.h" #include "clang/InstallAPI/Frontend.h" #include "llvm/ADT/SmallString.h" @@ -27,6 +28,31 @@ static bool isExported(const NamedDecl *D) { (LV.getVisibility() == DefaultVisibility); } +static bool isInlined(const FunctionDecl *D) { + bool HasInlineAttribute = false; + bool NoCXXAttr = + (!D->getASTContext().getLangOpts().CPlusPlus && + !D->getASTContext().getTargetInfo().getCXXABI().isMicrosoft() && + !D->hasAttr()); + + // Check all redeclarations to find an inline attribute or keyword. + for (const auto *RD : D->redecls()) { + if (!RD->isInlined()) + continue; + HasInlineAttribute = true; + if (!(NoCXXAttr || RD->hasAttr())) + continue; + if (RD->doesThisDeclarationHaveABody() && + RD->isInlineDefinitionExternallyVisible()) + return false; + } + + if (!HasInlineAttribute) + return false; + + return true; +} + static SymbolFlags getFlags(bool WeakDef, bool ThreadLocal) { SymbolFlags Result = SymbolFlags::None; if (WeakDef) @@ -204,4 +230,56 @@ bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) { return true; } +bool InstallAPIVisitor::VisitFunctionDecl(const FunctionDecl *D) { + if (const CXXMethodDecl *M = dyn_cast(D)) { + // Skip member function in class templates. + if (M->getParent()->getDescribedClassTemplate() != nullptr) + return true; + + // Skip methods in CXX RecordDecls. + for (auto P : D->getASTContext().getParents(*M)) { + if (P.get()) + return true; + } + + // Skip CXX ConstructorDecls and DestructorDecls. + if (isa(M) || isa(M)) + return true; + } + + // Skip templated functions. + switch (D->getTemplatedKind()) { + case FunctionDecl::TK_NonTemplate: + case FunctionDecl::TK_DependentNonTemplate: + break; + case FunctionDecl::TK_MemberSpecialization: + case FunctionDecl::TK_FunctionTemplateSpecialization: + if (auto *TempInfo = D->getTemplateSpecializationInfo()) { + if (!TempInfo->isExplicitInstantiationOrSpecialization()) + return true; + } + break; + case FunctionDecl::TK_FunctionTemplate: + case FunctionDecl::TK_DependentFunctionTemplateSpecialization: + return true; + } + + auto Access = getAccessForDecl(D); + if (!Access) + return true; + auto Name = getMangledName(D); + const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(D); + const bool ExplicitInstantiation = D->getTemplateSpecializationKind() == + TSK_ExplicitInstantiationDeclaration; + const bool WeakDef = ExplicitInstantiation || D->hasAttr(); + const bool Inlined = isInlined(D); + const RecordLinkage Linkage = (Inlined || !isExported(D)) + ? RecordLinkage::Internal + : RecordLinkage::Exported; + Ctx.Slice->addGlobal(Name, Linkage, GlobalRecord::Kind::Function, Avail, D, + *Access, getFlags(WeakDef, /*ThreadLocal=*/false), + Inlined); + return true; +} + } // namespace clang::installapi diff --git a/clang/test/InstallAPI/functions.test b/clang/test/InstallAPI/functions.test new file mode 100644 index 00000000000000..527965303cb351 --- /dev/null +++ b/clang/test/InstallAPI/functions.test @@ -0,0 +1,78 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json + +// RUN: clang-installapi -target arm64-apple-macos13.1 \ +// RUN: -I%t/usr/include -I%t/usr/local/include \ +// RUN: -install_name @rpath/lib/libfunctions.dylib \ +// RUN: %t/inputs.json -o %t/outputs.tbd 2>&1 | FileCheck %s --allow-empty +// RUN: llvm-readtapi -compare %t/outputs.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty + +// CHECK-NOT: error: +// CHECK-NOT: warning: + +//--- usr/include/functions.h +inline int inlined_func(void) { return 1;} +int public(int a); + +//--- usr/local/include/private_functions.h +__attribute__((visibility("hidden"))) +void hidden(void); + +//--- inputs.json.in +{ + "headers": [ { + "path" : "DSTROOT/usr/include/functions.h", + "type" : "public" + }, + { + "path" : "DSTROOT/usr/local/include/private_functions.h", + "type" : "private" + } + ], + "version": "3" +} + +//--- expected.tbd +{ + "main_library": { + "compatibility_versions": [ + { + "version": "0" + } + ], + "current_versions": [ + { + "version": "0" + } + ], + "exported_symbols": [ + { + "text": { + "global": [ + "_public" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/lib/libfunctions.dylib" + } + ], + "target_info": [ + { + "min_deployment": "13.1", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 7d45e999448d9f..b9c36eab2ad3b7 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -112,7 +112,9 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM, for (const Arg *A : ArgList) { if (A->isClaimed()) continue; - FrontendArgs.emplace_back(A->getAsString(ArgList)); + + FrontendArgs.emplace_back(A->getSpelling()); + llvm::copy(A->getValues(), std::back_inserter(FrontendArgs)); } FrontendArgs.push_back("-fsyntax-only"); } diff --git a/llvm/include/llvm/TextAPI/Record.h b/llvm/include/llvm/TextAPI/Record.h index 867d6a23588326..98639b064eaadd 100644 --- a/llvm/include/llvm/TextAPI/Record.h +++ b/llvm/include/llvm/TextAPI/Record.h @@ -103,8 +103,8 @@ class GlobalRecord : public Record { }; GlobalRecord(StringRef Name, RecordLinkage Linkage, SymbolFlags Flags, - Kind GV) - : Record({Name, Linkage, Flags}), GV(GV) {} + Kind GV, bool Inlined) + : Record({Name, Linkage, Flags}), GV(GV), Inlined(Inlined) {} bool isFunction() const { return GV == Kind::Function; } bool isVariable() const { return GV == Kind::Variable; } @@ -112,9 +112,11 @@ class GlobalRecord : public Record { if (GV == Kind::Unknown) GV = V; } + bool isInlined() const { return Inlined; } private: Kind GV; + bool Inlined = false; }; // Define Objective-C instance variable records. diff --git a/llvm/include/llvm/TextAPI/RecordsSlice.h b/llvm/include/llvm/TextAPI/RecordsSlice.h index 57b23e5ea29e71..f934cf7607f1fd 100644 --- a/llvm/include/llvm/TextAPI/RecordsSlice.h +++ b/llvm/include/llvm/TextAPI/RecordsSlice.h @@ -53,10 +53,13 @@ class RecordsSlice { /// \param Linkage The linkage of symbol. /// \param GV The kind of global. /// \param Flags The flags that describe attributes of the symbol. + /// \param Inlined Whether declaration is inlined, only applicable to + /// functions. /// \return The non-owning pointer to added record in slice. GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, - SymbolFlags Flags = SymbolFlags::None); + SymbolFlags Flags = SymbolFlags::None, + bool Inlined = false); /// Add ObjC Class record. /// diff --git a/llvm/lib/TextAPI/RecordsSlice.cpp b/llvm/lib/TextAPI/RecordsSlice.cpp index db52a2cdd85c9c..111a1fa6eaf43b 100644 --- a/llvm/lib/TextAPI/RecordsSlice.cpp +++ b/llvm/lib/TextAPI/RecordsSlice.cpp @@ -171,8 +171,8 @@ ObjCIVarRecord *RecordsSlice::findObjCIVar(bool IsScopedName, } GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage, - GlobalRecord::Kind GV, - SymbolFlags Flags) { + GlobalRecord::Kind GV, SymbolFlags Flags, + bool Inlined) { if (GV == GlobalRecord::Kind::Function) Flags |= SymbolFlags::Text; else if (GV == GlobalRecord::Kind::Variable) @@ -182,7 +182,7 @@ GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage, auto Result = Globals.insert({Name, nullptr}); if (Result.second) Result.first->second = - std::make_unique(Name, Linkage, Flags, GV); + std::make_unique(Name, Linkage, Flags, GV, Inlined); else { updateLinkage(Result.first->second.get(), Linkage); updateFlags(Result.first->second.get(), Flags); diff --git a/llvm/unittests/TextAPI/RecordTests.cpp b/llvm/unittests/TextAPI/RecordTests.cpp index 37289eca1bdf6b..89ffbc4275e0d6 100644 --- a/llvm/unittests/TextAPI/RecordTests.cpp +++ b/llvm/unittests/TextAPI/RecordTests.cpp @@ -19,7 +19,7 @@ TEST(TAPIRecord, Simple) { GlobalRecord API{"_sym", RecordLinkage::Rexported, SymbolFlags::Rexported | SymbolFlags::Text | SymbolFlags::ThreadLocalValue, - GlobalRecord::Kind::Function}; + GlobalRecord::Kind::Function, /*Inlined=*/false}; EXPECT_TRUE(API.isExported()); EXPECT_TRUE(API.isText()); EXPECT_TRUE(API.isRexported()); @@ -30,6 +30,7 @@ TEST(TAPIRecord, Simple) { EXPECT_FALSE(API.isWeakDefined()); EXPECT_FALSE(API.isWeakReferenced()); EXPECT_FALSE(API.isVariable()); + EXPECT_FALSE(API.isInlined()); } TEST(TAPIRecord, SimpleObjC) { From 3712edbdbb79e0169acf0c57e111f3195006c013 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 7 Mar 2024 15:37:45 -0800 Subject: [PATCH 118/158] [libc] finish documenting c23 additions (#84383) - [libc] finish documenting c23 additions - sort according to appearance in Annex B and section 7 --- libc/docs/c23.rst | 112 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 33 deletions(-) diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst index ec9d40947cc567..24cef8539393df 100644 --- a/libc/docs/c23.rst +++ b/libc/docs/c23.rst @@ -15,36 +15,15 @@ Implementation Status (It's helpful to review 'Annex B (Informative) Library Summary' for these.) -New headers: - -* stdbit.h -* stdckdint.h (|check|, macros are only defined with `__GNUC__` builtins) Additions: -* uchar.h - - * mbrtoc8 - * c8rtomb - * char*_t - -* string.h - - * memset_explicit - * memccpy - * strdup - * strndup - -* time.h - - * gmtime_r - * localtime_r - * timegm - * timespec_getres - * strftime conversion specifiers +* fenv.h - * 0b - * 0B + * fesetexcept + * fetestexceptflag + * fegetmode + * fesetmode * math.h * acospi* @@ -96,20 +75,87 @@ Additions: * dfmal * fsqrt* * dsqrtl -* fenv.h - - * fesetexcept - * fetestexceptflag - * fegetmode - * fesetmode +* stdbit.h (New header) +* stdckdint.h (New header) |check| * stddef.h * unreachable * stdlib.h + * strfromd + * strfromf + * strfroml * free_sized * free_aligned_sized * memalignment +* string.h + + * memset_explicit |check| + * memccpy + * strdup + * strndup * tgmath.h - * + * acospi + * asinpi + * atan2pi + * atanpi + * compoundn + * cospi + * erf + * exp10m1 + * exp10 + * exp2m1 + * fmaximum + * fmaximum_mag + * fmaximum_num + * fmaximum_mag_num + * fminimum + * fminimum_mag + * fminimum_num + * fminimum_mag_num + * fromfpx + * fromfp + * llogb + * log10p1 + * log2p1 + * logp1 + * nextdown + * nextup + * pown + * powr + * rootn + * roundeven + * rsqrt + * scalbn + * sinpi + * tanpi + * ufromfpx + * ufromfp + * fadd + * dadd + * fsub + * dsub + * fmul + * dmul + * fdiv + * ddiv + * ffma + * dfma + * fsqrt + * dsqrt +* time.h + + * gmtime_r + * localtime_r + * timegm + * timespec_getres + * strftime conversion specifiers + + * 0b + * 0B +* uchar.h + + * mbrtoc8 + * c8rtomb + * char*_t From 293ec4865bfcb6df2091ef4bcce706a566794b5c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 7 Mar 2024 15:38:16 -0800 Subject: [PATCH 119/158] [libc] rename cpp::count_ones to cpp::popcount to better mirror std:: (#84388) libc/src/__support/CPP/bit.h and cpp:: is meant to mirror std::. Fix the TODO. --- libc/src/__support/CPP/bit.h | 10 ++++------ libc/src/__support/UInt.h | 2 +- libc/src/stdbit/stdc_count_ones_uc.cpp | 2 +- libc/src/stdbit/stdc_count_ones_ui.cpp | 2 +- libc/src/stdbit/stdc_count_ones_ul.cpp | 2 +- libc/src/stdbit/stdc_count_ones_ull.cpp | 2 +- libc/src/stdbit/stdc_count_ones_us.cpp | 2 +- libc/test/src/__support/CPP/bit_test.cpp | 4 ++-- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 6b625b0c97a365..9c74a346949f0c 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -269,14 +269,12 @@ first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } -/// Count number of 1's aka population count or hamming weight. +/// Count number of 1's aka population count or Hamming weight. /// /// Only unsigned integral types are allowed. -// TODO: rename as 'popcount' to follow the standard -// https://en.cppreference.com/w/cpp/numeric/popcount template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> -count_ones(T value) { +popcount(T value) { int count = 0; for (int i = 0; i != cpp::numeric_limits::digits; ++i) if ((value >> i) & 0x1) @@ -285,7 +283,7 @@ count_ones(T value) { } #define ADD_SPECIALIZATION(TYPE, BUILTIN) \ template <> \ - [[nodiscard]] LIBC_INLINE constexpr int count_ones(TYPE value) { \ + [[nodiscard]] LIBC_INLINE constexpr int popcount(TYPE value) { \ return BUILTIN(value); \ } ADD_SPECIALIZATION(unsigned char, __builtin_popcount) @@ -300,7 +298,7 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> count_zeros(T value) { - return count_ones(static_cast(~value)); + return popcount(static_cast(~value)); } } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 925de8764715da..e899a79684b739 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -979,7 +979,7 @@ has_single_bit(T value) { for (auto word : value.val) { if (word == 0) continue; - bits += count_ones(word); + bits += popcount(word); if (bits > 1) return false; } diff --git a/libc/src/stdbit/stdc_count_ones_uc.cpp b/libc/src/stdbit/stdc_count_ones_uc.cpp index 5a7314caa3baa0..1e998ff521b7db 100644 --- a/libc/src/stdbit/stdc_count_ones_uc.cpp +++ b/libc/src/stdbit/stdc_count_ones_uc.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_uc, (unsigned char value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ui.cpp b/libc/src/stdbit/stdc_count_ones_ui.cpp index 289f4bac31f7b8..e457dd793db33d 100644 --- a/libc/src/stdbit/stdc_count_ones_ui.cpp +++ b/libc/src/stdbit/stdc_count_ones_ui.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ui, (unsigned value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ul.cpp b/libc/src/stdbit/stdc_count_ones_ul.cpp index 83f3279d791937..ed86653fc7ee2e 100644 --- a/libc/src/stdbit/stdc_count_ones_ul.cpp +++ b/libc/src/stdbit/stdc_count_ones_ul.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ul, (unsigned long value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ull.cpp b/libc/src/stdbit/stdc_count_ones_ull.cpp index 104788aaf21265..c5ecc3cda6477a 100644 --- a/libc/src/stdbit/stdc_count_ones_ull.cpp +++ b/libc/src/stdbit/stdc_count_ones_ull.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ull, (unsigned long long value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_us.cpp b/libc/src/stdbit/stdc_count_ones_us.cpp index 4b6ff0b94b626a..465c5c374e7c64 100644 --- a/libc/src/stdbit/stdc_count_ones_us.cpp +++ b/libc/src/stdbit/stdc_count_ones_us.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_us, (unsigned short value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 25a80ca9209c2f..d3f56d5bad83d3 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -260,9 +260,9 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { } TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { - EXPECT_EQ(count_ones(T(0)), 0); + EXPECT_EQ(popcount(T(0)), 0); for (int i = 0; i != cpp::numeric_limits::digits; ++i) - EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), + EXPECT_EQ(popcount(cpp::numeric_limits::max() >> i), cpp::numeric_limits::digits - i); } From f862265733d65efbfd819408b594b3b2854491d2 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 7 Mar 2024 15:39:41 -0800 Subject: [PATCH 120/158] AMDGPU: Use True16Predicate for UseRealTrue16Insts in VOP2 Reals (#84394) We can not use OtherPredicates or SubtargetPredicate because they should be copied from pseudo to real, and we should not override them. --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 13fe79b4759608..53578682e00246 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -112,7 +112,7 @@ class VOP2_Real : VOP2_Real { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1272,7 +1272,7 @@ class VOP2_DPP16_Gen op, VOP2_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP16 { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1301,7 +1301,7 @@ class VOP2_DPP8_Gen op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8 { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } From a01e9ce86f4c1bc9af819902db9f287b6d23f54f Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 7 Mar 2024 15:38:33 -0800 Subject: [PATCH 121/158] [AArc64][GlobalISel] Fix legalizer assert for G_INSERT_VECTOR_ELT We should moreElements <3 x s1> to <4 x s1> before we try to widen the element, otherwise we end up with a <3 x s21> nonsense type. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 + .../GlobalISel/legalize-insert-vector-elt.mir | 70 +++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index ad389cfc75aa94..36adada2796531 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -884,6 +884,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64})) + .moreElementsToNextPow2(0) .widenVectorEltsToVectorMinSize(0, 64); getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir index 6f6cf2cc165b9f..e12353c7ef5bec 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir @@ -216,3 +216,73 @@ body: | $q0 = COPY %2(<2 x s64>) RET_ReallyLR ... +--- +name: v3s8_crash +body: | + ; CHECK-LABEL: name: v3s8_crash + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w1, $w2, $w3, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C2]](s16), [[C1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY]](p0) :: (store (s32), align 16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + liveins: $w1, $w2, $w3, $x0 + + %0:_(p0) = COPY $x0 + %2:_(s32) = COPY $w1 + %3:_(s32) = COPY $w2 + %4:_(s32) = COPY $w3 + %5:_(<3 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32) + %1:_(<3 x s8>) = G_TRUNC %5(<3 x s32>) + %8:_(s64) = G_CONSTANT i64 0 + %11:_(s8) = G_IMPLICIT_DEF + %7:_(s8) = G_CONSTANT i8 0 + %10:_(<3 x s8>) = G_BUILD_VECTOR %7(s8), %11(s8), %11(s8) + + bb.2: + %14:_(s64) = G_CONSTANT i64 0 + %15:_(s8) = G_CONSTANT i8 0 + %6:_(<3 x s8>) = G_INSERT_VECTOR_ELT %1, %15(s8), %14(s64) + %9:_(<12 x s8>) = G_SHUFFLE_VECTOR %6(<3 x s8>), %10, shufflemask(0, 3, 3, 3, 1, 3, 3, 3, 2, 3, 3, 3) + %12:_(<3 x s32>) = G_BITCAST %9(<12 x s8>) + %13:_(<3 x s32>) = G_UITOFP %12(<3 x s32>) + G_STORE %13(<3 x s32>), %0(p0) :: (store (<3 x s32>)) + G_BR %bb.2 + +... From 3e5afba8ef9319956d288ff755df3c442433eb88 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 7 Mar 2024 15:41:12 -0800 Subject: [PATCH 122/158] [NFC] [hwasan] be consistent about how to get integer types (#84396) --- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 236ee8910d46ab..289183ecf0f286 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -412,8 +412,8 @@ class HWAddressSanitizer { Type *VoidTy = Type::getVoidTy(M.getContext()); Type *IntptrTy; PointerType *PtrTy; - Type *Int8Ty; - Type *Int32Ty; + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); Type *Int64Ty = Type::getInt64Ty(M.getContext()); bool CompileKernel; @@ -615,8 +615,6 @@ void HWAddressSanitizer::initializeModule() { IRBuilder<> IRB(*C); IntptrTy = IRB.getIntPtrTy(DL); PtrTy = IRB.getPtrTy(); - Int8Ty = IRB.getInt8Ty(); - Int32Ty = IRB.getInt32Ty(); HwasanCtorFunction = nullptr; From ddf79deb42d901fbb732e56464efbf93bc444070 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Fri, 8 Mar 2024 07:42:01 +0800 Subject: [PATCH 123/158] [Asan] Fix -Wunused-private-field in non-assertion builds (NFC) llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp:650:13: error: private field 'OwnerFn' is not used [-Werror,-Wunused-private-field] Function *OwnerFn = nullptr; ^ 1 error generated. --- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index c95a50a033b1b2..db75eec21a3745 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -656,6 +656,7 @@ class RuntimeCallInserter { ArrayRef Args = {}, const Twine &Name = "") { assert(IRB.GetInsertBlock()->getParent() == OwnerFn); + (void)OwnerFn; return IRB.CreateCall(Callee, Args, Name, nullptr); } }; From 8bf8d36f8e82a1e2d32f33dbe7369d9cecd57f46 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Thu, 7 Mar 2024 23:53:27 +0000 Subject: [PATCH 124/158] [compiler-rt][fuzzer] Reland "SetThreadName windows implementation" (#83562) Following-up on GH-76761. --- compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 25 ++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 71770166805f78..0dbcec8b5f2215 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -21,10 +21,15 @@ #include #include #include +// clang-format off #include - -// This must be included after windows.h. +// These must be included after windows.h. +// archicture need to be set before including +// libloaderapi +#include +#include #include +// clang-format on namespace fuzzer { @@ -234,8 +239,20 @@ size_t PageSize() { } void SetThreadName(std::thread &thread, const std::string &name) { - // TODO ? - // to UTF-8 then SetThreadDescription ? + typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); + HMODULE kbase = GetModuleHandleA("KernelBase.dll"); + proc ThreadNameProc = + reinterpret_cast(GetProcAddress(kbase, "SetThreadDescription")); + if (proc) { + std::wstring buf; + auto sz = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0); + if (sz > 0) { + buf.resize(sz); + if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, &buf[0], sz) > 0) { + (void)ThreadNameProc(thread.native_handle(), buf.c_str()); + } + } + } } } // namespace fuzzer From 26fa4409572ad81c5522165ba2a831845f4d0635 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 7 Mar 2024 15:50:25 -0800 Subject: [PATCH 125/158] [GlobalISel] Fix yet another pointer type invalid combining issue, this time in tryFoldSelectOfConstants() --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 ++ .../AArch64/GlobalISel/combine-select.mir | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 4862207d53f492..ab055b723dbb1f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6369,6 +6369,9 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, if (CondTy != LLT::scalar(1)) return false; + if (TrueTy.isPointer()) + return false; + // Both are scalars. std::optional TrueOpt = getIConstantVRegValWithLookThrough(True, MRI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index 7b73c8cec47746..2bf7e84a379ba0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -859,3 +859,40 @@ body: | RET_ReallyLR implicit $x0 ... +--- +name: dont_combine_pointer_type_select_of_constant +alignment: 4 +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: dont_combine_pointer_type_select_of_constant + ; CHECK: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[TRUNC1]](s1), [[C]], [[INTTOPTR]] + ; CHECK-NEXT: G_STORE [[SELECT]](p0), [[COPY1]](p0) :: (store (p0)) + ; CHECK-NEXT: RET_ReallyLR + %3:_(s32) = COPY $w0 + %2:_(s8) = G_TRUNC %3(s32) + %1:_(p0) = COPY $x1 + %4:_(s8) = G_ASSERT_ZEXT %2, 1 + %0:_(s1) = G_TRUNC %4(s8) + %6:_(p0) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 -1 + %7:_(p0) = G_INTTOPTR %8(s64) + %5:_(p0) = G_SELECT %0(s1), %6, %7 + G_STORE %5(p0), %1(p0) :: (store (p0)) + RET_ReallyLR + +... From b408241d0ad9ce009b49018fe1e9838887abf3c1 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Fri, 8 Mar 2024 00:00:26 +0000 Subject: [PATCH 126/158] [compiler-rt] adding fchmodat2 syscall introduced in Linux 6.6. (#82275) --- .../lib/sanitizer_common/sanitizer_common_syscalls.inc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc index c10943b3e48793..b3161690f3ce8a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc @@ -2808,6 +2808,15 @@ PRE_SYSCALL(fchownat) POST_SYSCALL(fchownat) (long res, long dfd, const void *filename, long user, long group, long flag) {} +PRE_SYSCALL(fchmodat2)(long dfd, const void *filename, long mode, long flag) { + if (filename) + PRE_READ(filename, + __sanitizer::internal_strlen((const char *)filename) + 1); +} + +POST_SYSCALL(fchmodat2) +(long res, long dfd, const void *filename, long mode, long flag) {} + PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) { if (filename) PRE_READ(filename, From 487cfbe494413e12123b55dead5ef8742ef49fb2 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 8 Mar 2024 00:01:37 +0000 Subject: [PATCH 127/158] [Clang] Implement constexpr support for `__builtin_popcountg` (#84318) --- clang/docs/LanguageExtensions.rst | 1 + clang/docs/ReleaseNotes.rst | 5 +++++ clang/include/clang/Basic/Builtins.td | 2 +- clang/lib/AST/ExprConstant.cpp | 1 + clang/test/Sema/constant-builtins-2.c | 7 +++++++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 2b54dffd058a35..06af93fd3c15ca 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5378,6 +5378,7 @@ The following builtin intrinsics can be used in constant expressions: * ``__builtin_popcount`` * ``__builtin_popcountl`` * ``__builtin_popcountll`` +* ``__builtin_popcountg`` * ``__builtin_rotateleft8`` * ``__builtin_rotateleft16`` * ``__builtin_rotateleft32`` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 42c4a7c4d4bd14..fa23c215790f11 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -157,6 +157,11 @@ Non-comprehensive list of changes in this release - ``__builtin_addc``, ``__builtin_subc``, and the other sizes of those builtins are now constexpr and may be used in constant expressions. +- Added ``__builtin_popcountg`` as a type-generic alternative to + ``__builtin_popcount{,l,ll}`` with support for any unsigned integer type. Like + the previous builtins, this new builtin is constexpr and may be used in + constant expressions. + New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index a81131d82c4cb4..9c703377ca8d3e 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -706,7 +706,7 @@ def Popcount : Builtin, BitInt_Long_LongLongTemplate { def Popcountg : Builtin { let Spellings = ["__builtin_popcountg"]; - let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Attributes = [NoThrow, Const, Constexpr, CustomTypeChecking]; let Prototype = "int(...)"; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d8ca35740fbc35..4a7c7755e1d6fd 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12483,6 +12483,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: + case Builtin::BI__builtin_popcountg: case Builtin::BI__popcnt16: // Microsoft variants of popcount case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c index 2bdd7b06daabfe..0935abe4c65fbe 100644 --- a/clang/test/Sema/constant-builtins-2.c +++ b/clang/test/Sema/constant-builtins-2.c @@ -237,6 +237,13 @@ char popcount7[__builtin_popcountl(~0L) == BITSIZE(long) ? 1 : -1]; char popcount8[__builtin_popcountll(0LL) == 0 ? 1 : -1]; char popcount9[__builtin_popcountll(0xF0F0LL) == 8 ? 1 : -1]; char popcount10[__builtin_popcountll(~0LL) == BITSIZE(long long) ? 1 : -1]; +char popcount11[__builtin_popcountg(0U) == 0 ? 1 : -1]; +char popcount12[__builtin_popcountg(0xF0F0U) == 8 ? 1 : -1]; +char popcount13[__builtin_popcountg(~0U) == BITSIZE(int) ? 1 : -1]; +char popcount14[__builtin_popcountg(~0UL) == BITSIZE(long) ? 1 : -1]; +char popcount15[__builtin_popcountg(~0ULL) == BITSIZE(long long) ? 1 : -1]; +char popcount16[__builtin_popcountg(~(unsigned __int128)0) == BITSIZE(__int128) ? 1 : -1]; +char popcount17[__builtin_popcountg(~(unsigned _BitInt(128))0) == BITSIZE(_BitInt(128)) ? 1 : -1]; char parity1[__builtin_parity(0) == 0 ? 1 : -1]; char parity2[__builtin_parity(0xb821) == 0 ? 1 : -1]; From e932fe880b69a6cd13b4f29678c7f143540f1999 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Fri, 8 Mar 2024 00:15:22 +0000 Subject: [PATCH 128/158] [compiler-rt][Fuzzer] fix windows typo (#84407) --- compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 0dbcec8b5f2215..db80eb383885e6 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -243,7 +243,7 @@ void SetThreadName(std::thread &thread, const std::string &name) { HMODULE kbase = GetModuleHandleA("KernelBase.dll"); proc ThreadNameProc = reinterpret_cast(GetProcAddress(kbase, "SetThreadDescription")); - if (proc) { + if (ThreadNameProc) { std::wstring buf; auto sz = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0); if (sz > 0) { From 0d4978f3cf8f917d88c19ec0ba3b1b3ef092cef1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Mar 2024 15:45:26 -0800 Subject: [PATCH 129/158] [RISCV] Update some tests I missed in 909ab0e0d1903ad2329ca9fdf248d21330f9437f. NFC --- llvm/test/CodeGen/RISCV/forced-atomics.ll | 2 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 60 ++++++++++++----------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index 2b198afb47a9ae..659e0748dd5325 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3567,8 +3567,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: neg a3, a0 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 6bfacc3e9814b4..630d16e7c888b9 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1324,8 +1324,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: lw a3, 12(sp) ; RV32IF-NEXT: and a5, a2, a1 ; RV32IF-NEXT: beqz a5, .LBB20_7 ; RV32IF-NEXT: # %bb.6: # %entry @@ -1334,17 +1334,18 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: .LBB20_7: ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB20_8: # %entry -; RV32IF-NEXT: and a4, a2, a4 +; RV32IF-NEXT: and a3, a2, a3 ; RV32IF-NEXT: or a0, a0, a5 -; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: and a2, a2, a4 ; RV32IF-NEXT: bnez a0, .LBB20_10 ; RV32IF-NEXT: # %bb.9: -; RV32IF-NEXT: or a0, a2, a4 -; RV32IF-NEXT: snez a1, a0 +; RV32IF-NEXT: snez a0, a3 +; RV32IF-NEXT: snez a1, a2 +; RV32IF-NEXT: or a1, a1, a0 ; RV32IF-NEXT: .LBB20_10: # %entry ; RV32IF-NEXT: neg a1, a1 ; RV32IF-NEXT: and a0, a1, a2 -; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: and a1, a1, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1403,8 +1404,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.4: # %entry ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) ; RV32IFD-NEXT: and a5, a2, a1 ; RV32IFD-NEXT: beqz a5, .LBB20_7 ; RV32IFD-NEXT: # %bb.6: # %entry @@ -1413,17 +1414,18 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB20_7: ; RV32IFD-NEXT: snez a1, a0 ; RV32IFD-NEXT: .LBB20_8: # %entry -; RV32IFD-NEXT: and a4, a2, a4 +; RV32IFD-NEXT: and a3, a2, a3 ; RV32IFD-NEXT: or a0, a0, a5 -; RV32IFD-NEXT: and a2, a2, a3 +; RV32IFD-NEXT: and a2, a2, a4 ; RV32IFD-NEXT: bnez a0, .LBB20_10 ; RV32IFD-NEXT: # %bb.9: -; RV32IFD-NEXT: or a0, a2, a4 -; RV32IFD-NEXT: snez a1, a0 +; RV32IFD-NEXT: snez a0, a3 +; RV32IFD-NEXT: snez a1, a2 +; RV32IFD-NEXT: or a1, a1, a0 ; RV32IFD-NEXT: .LBB20_10: # %entry ; RV32IFD-NEXT: neg a1, a1 ; RV32IFD-NEXT: and a0, a1, a2 -; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: and a1, a1, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -1594,8 +1596,8 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB23_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1604,17 +1606,18 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: .LBB23_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB23_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 -; RV32-NEXT: snez a1, a0 +; RV32-NEXT: snez a0, a3 +; RV32-NEXT: snez a1, a2 +; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: .LBB23_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1847,8 +1850,8 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB26_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1857,17 +1860,18 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: .LBB26_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB26_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: bnez a0, .LBB26_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 -; RV32-NEXT: snez a1, a0 +; RV32-NEXT: snez a0, a3 +; RV32-NEXT: snez a1, a2 +; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: .LBB26_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret From fcd2d483251605f1b6cdace0ce5baf5dfd31b880 Mon Sep 17 00:00:00 2001 From: vadikp-intel Date: Thu, 7 Mar 2024 16:28:03 -0800 Subject: [PATCH 130/158] [OpenMP] runtime support for efficient partitioning of collapsed triangular loops (#83939) This PR adds OMP runtime support for more efficient partitioning of certain types of collapsed loops that can be used by compilers that support loop collapsing (i.e. MSVC) to achieve more optimal thread load balancing. In particular, this PR addresses double nested upper and lower isosceles triangular loops of the following types 1. lower triangular 'less_than' for (int i=0; i --- openmp/runtime/src/kmp_collapse.cpp | 311 ++++++++++++++++++ openmp/runtime/src/kmp_collapse.h | 11 +- .../omp_for_collapse_LowerTriangularLess.c | 124 +++++++ ...mp_for_collapse_LowerTriangularLessEqual.c | 124 +++++++ .../for/omp_for_collapse_UpperTriangular.c | 124 +++++++ 5 files changed, 692 insertions(+), 2 deletions(-) create mode 100644 openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c create mode 100644 openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c create mode 100644 openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c diff --git a/openmp/runtime/src/kmp_collapse.cpp b/openmp/runtime/src/kmp_collapse.cpp index 2c410ca9b6030e..569d2c1508319d 100644 --- a/openmp/runtime/src/kmp_collapse.cpp +++ b/openmp/runtime/src/kmp_collapse.cpp @@ -1272,6 +1272,304 @@ void kmp_calc_original_ivs_for_end( } } +/************************************************************************** + * Identify nested loop structure - loops come in the canonical form + * Lower triangle matrix: i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0; j <= 0/-1+1*i; j++ {0,0}:{0/-1,1} + * Upper Triangle matrix + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0+1*i; j <= N; j++ {0,1}:{N,0} + * ************************************************************************/ +nested_loop_type_t +kmp_identify_nested_loop_structure(/*in*/ bounds_info_t *original_bounds_nest, + /*in*/ kmp_index_t n) { + // only 2-level nested loops are supported + if (n != 2) { + return nested_loop_type_unkown; + } + // loops must be canonical + KMP_ASSERT( + (original_bounds_nest[0].comparison == comparison_t::comp_less_or_eq) && + (original_bounds_nest[1].comparison == comparison_t::comp_less_or_eq)); + // check outer loop bounds: for triangular need to be {0,0}:{N,0} + kmp_uint64 outer_lb0_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 outer_ub0_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb1_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb1_u64); + kmp_uint64 outer_ub1_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub1_u64); + if (outer_lb0_u64 != 0 || outer_lb1_u64 != 0 || outer_ub1_u64 != 0) { + return nested_loop_type_unkown; + } + // check inner bounds to determine triangle type + kmp_uint64 inner_lb0_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].lb0_u64); + kmp_uint64 inner_ub0_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + kmp_uint64 inner_lb1_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].lb1_u64); + kmp_uint64 inner_ub1_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub1_u64); + // lower triangle loop inner bounds need to be {0,0}:{0/-1,1} + if (inner_lb0_u64 == 0 && inner_lb1_u64 == 0 && + (inner_ub0_u64 == 0 || inner_ub0_u64 == -1) && inner_ub1_u64 == 1) { + return nested_loop_type_lower_triangular_matrix; + } + // upper triangle loop inner bounds need to be {0,1}:{N,0} + if (inner_lb0_u64 == 0 && inner_lb1_u64 == 1 && + inner_ub0_u64 == outer_ub0_u64 && inner_ub1_u64 == 0) { + return nested_loop_type_upper_triangular_matrix; + } + return nested_loop_type_unkown; +} + +/************************************************************************** + * SQRT Approximation: https://math.mit.edu/~stevenj/18.335/newton-sqrt.pdf + * Start point is x so the result is always > sqrt(x) + * The method has uniform convergence, PRECISION is set to 0.1 + * ************************************************************************/ +#define level_of_precision 0.1 +double sqrt_newton_approx(/*in*/ kmp_uint64 x) { + double sqrt_old = 0.; + double sqrt_new = (double)x; + do { + sqrt_old = sqrt_new; + sqrt_new = (sqrt_old + x / sqrt_old) / 2; + } while ((sqrt_old - sqrt_new) > level_of_precision); + return sqrt_new; +} + +/************************************************************************** + * Handle lower triangle matrix in the canonical form + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0; j <= 0/-1 + 1*i; j++ {0,0}:{0/-1,1} + * ************************************************************************/ +void kmp_handle_lower_triangle_matrix( + /*in*/ kmp_uint32 nth, + /*in*/ kmp_uint32 tid, + /*in */ kmp_index_t n, + /*in/out*/ bounds_info_t *original_bounds_nest, + /*out*/ bounds_info_t *chunk_bounds_nest) { + + // transfer loop types from the original loop to the chunks + for (kmp_index_t i = 0; i < n; ++i) { + chunk_bounds_nest[i] = original_bounds_nest[i]; + } + // cleanup iv variables + kmp_uint64 outer_ub0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 inner_ub0 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + // calculate the chunk's lower and upper bounds + // the total number of iterations in the loop is the sum of the arithmetic + // progression from the outer lower to outer upper bound (inclusive since the + // loop is canonical) note that less_than inner loops (inner_ub0 = -1) + // effectively make the progression 1-based making N = (outer_ub0 - inner_lb0 + // + 1) -> N - 1 + kmp_uint64 outer_iters = (outer_ub0 - outer_lb0 + 1) + inner_ub0; + kmp_uint64 iter_total = outer_iters * (outer_iters + 1) / 2; + // the current thread's number of iterations: + // each thread gets an equal number of iterations: total number of iterations + // divided by the number of threads plus, if there's a remainder, + // the first threads with the number up to the remainder get an additional + // iteration each to cover it + kmp_uint64 iter_current = + iter_total / nth + ((tid < (iter_total % nth)) ? 1 : 0); + // cumulative number of iterations executed by all the previous threads: + // threads with the tid below the remainder will have (iter_total/nth+1) + // elements, and so will all threads before them so the cumulative number of + // iterations executed by the all previous will be the current thread's number + // of iterations multiplied by the number of previous threads which is equal + // to the current thread's tid; threads with the number equal or above the + // remainder will have (iter_total/nth) elements so the cumulative number of + // iterations previously executed is its number of iterations multipled by the + // number of previous threads which is again equal to the current thread's tid + // PLUS all the remainder iterations that will have been executed by the + // previous threads + kmp_uint64 iter_before_current = + tid * iter_current + ((tid < iter_total % nth) ? 0 : (iter_total % nth)); + // cumulative number of iterations executed with the current thread is + // the cumulative number executed before it plus its own + kmp_uint64 iter_with_current = iter_before_current + iter_current; + // calculate the outer loop lower bound (lbo) which is the max outer iv value + // that gives the number of iterations that is equal or just below the total + // number of iterations executed by the previous threads, for less_than + // (1-based) inner loops (inner_ub0 == -1) it will be i.e. + // lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0 + // for less_than_equal (0-based) inner loops (inner_ub == 0) it will be: + // i.e. lbo*(lbo+1)/2<=iter_before_current => + // lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily + // using a parameter to control the equation sign + kmp_int64 inner_adjustment = 1 + 2 * inner_ub0; + kmp_uint64 lower_bound_outer = + (kmp_uint64)(sqrt_newton_approx(inner_adjustment * inner_adjustment + + 8 * iter_before_current) + + inner_adjustment) / + 2 - + inner_adjustment; + // calculate the inner loop lower bound which is the remaining number of + // iterations required to hit the total number of iterations executed by the + // previous threads giving the starting point of this thread + kmp_uint64 lower_bound_inner = + iter_before_current - + ((lower_bound_outer + inner_adjustment) * lower_bound_outer) / 2; + // calculate the outer loop upper bound using the same approach as for the + // inner bound except using the total number of iterations executed with the + // current thread + kmp_uint64 upper_bound_outer = + (kmp_uint64)(sqrt_newton_approx(inner_adjustment * inner_adjustment + + 8 * iter_with_current) + + inner_adjustment) / + 2 - + inner_adjustment; + // calculate the inner loop upper bound which is the remaining number of + // iterations required to hit the total number of iterations executed after + // the current thread giving the starting point of the next thread + kmp_uint64 upper_bound_inner = + iter_with_current - + ((upper_bound_outer + inner_adjustment) * upper_bound_outer) / 2; + // adjust the upper bounds down by 1 element to point at the last iteration of + // the current thread the first iteration of the next thread + if (upper_bound_inner == 0) { + // {n,0} => {n-1,n-1} + upper_bound_outer -= 1; + upper_bound_inner = upper_bound_outer; + } else { + // {n,m} => {n,m-1} (m!=0) + upper_bound_inner -= 1; + } + + // assign the values, zeroing out lb1 and ub1 values since the iteration space + // is now one-dimensional + chunk_bounds_nest[0].lb0_u64 = lower_bound_outer; + chunk_bounds_nest[1].lb0_u64 = lower_bound_inner; + chunk_bounds_nest[0].ub0_u64 = upper_bound_outer; + chunk_bounds_nest[1].ub0_u64 = upper_bound_inner; + chunk_bounds_nest[0].lb1_u64 = 0; + chunk_bounds_nest[0].ub1_u64 = 0; + chunk_bounds_nest[1].lb1_u64 = 0; + chunk_bounds_nest[1].ub1_u64 = 0; + +#if 0 + printf("tid/nth = %d/%d : From [%llu, %llu] To [%llu, %llu] : Chunks %llu/%llu\n", + tid, nth, chunk_bounds_nest[0].lb0_u64, chunk_bounds_nest[1].lb0_u64, + chunk_bounds_nest[0].ub0_u64, chunk_bounds_nest[1].ub0_u64, iter_current, iter_total); +#endif +} + +/************************************************************************** + * Handle upper triangle matrix in the canonical form + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0+1*i; j <= N; j++ {0,1}:{N,0} + * ************************************************************************/ +void kmp_handle_upper_triangle_matrix( + /*in*/ kmp_uint32 nth, + /*in*/ kmp_uint32 tid, + /*in */ kmp_index_t n, + /*in/out*/ bounds_info_t *original_bounds_nest, + /*out*/ bounds_info_t *chunk_bounds_nest) { + + // transfer loop types from the original loop to the chunks + for (kmp_index_t i = 0; i < n; ++i) { + chunk_bounds_nest[i] = original_bounds_nest[i]; + } + // cleanup iv variables + kmp_uint64 outer_ub0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 inner_ub0 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + // calculate the chunk's lower and upper bounds + // the total number of iterations in the loop is the sum of the arithmetic + // progression from the outer lower to outer upper bound (inclusive since the + // loop is canonical) note that less_than inner loops (inner_ub0 = -1) + // effectively make the progression 1-based making N = (outer_ub0 - inner_lb0 + // + 1) -> N - 1 + kmp_uint64 outer_iters = (outer_ub0 - outer_lb0 + 1); + kmp_uint64 iter_total = outer_iters * (outer_iters + 1) / 2; + // the current thread's number of iterations: + // each thread gets an equal number of iterations: total number of iterations + // divided by the number of threads plus, if there's a remainder, + // the first threads with the number up to the remainder get an additional + // iteration each to cover it + kmp_uint64 iter_current = + iter_total / nth + ((tid < (iter_total % nth)) ? 1 : 0); + // cumulative number of iterations executed by all the previous threads: + // threads with the tid below the remainder will have (iter_total/nth+1) + // elements, and so will all threads before them so the cumulative number of + // iterations executed by the all previous will be the current thread's number + // of iterations multiplied by the number of previous threads which is equal + // to the current thread's tid; threads with the number equal or above the + // remainder will have (iter_total/nth) elements so the cumulative number of + // iterations previously executed is its number of iterations multipled by the + // number of previous threads which is again equal to the current thread's tid + // PLUS all the remainder iterations that will have been executed by the + // previous threads + kmp_uint64 iter_before_current = + tid * iter_current + ((tid < iter_total % nth) ? 0 : (iter_total % nth)); + // cumulative number of iterations executed with the current thread is + // the cumulative number executed before it plus its own + kmp_uint64 iter_with_current = iter_before_current + iter_current; + // calculate the outer loop lower bound (lbo) which is the max outer iv value + // that gives the number of iterations that is equal or just below the total + // number of iterations executed by the previous threads, for less_than + // (1-based) inner loops (inner_ub0 == -1) it will be i.e. + // lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0 + // for less_than_equal (0-based) inner loops (inner_ub == 0) it will be: + // i.e. lbo*(lbo+1)/2<=iter_before_current => + // lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily + // using a parameter to control the equatio sign + kmp_uint64 lower_bound_outer = + (kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_before_current) + 1) / 2 - 1; + ; + // calculate the inner loop lower bound which is the remaining number of + // iterations required to hit the total number of iterations executed by the + // previous threads giving the starting point of this thread + kmp_uint64 lower_bound_inner = + iter_before_current - ((lower_bound_outer + 1) * lower_bound_outer) / 2; + // calculate the outer loop upper bound using the same approach as for the + // inner bound except using the total number of iterations executed with the + // current thread + kmp_uint64 upper_bound_outer = + (kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_with_current) + 1) / 2 - 1; + // calculate the inner loop upper bound which is the remaining number of + // iterations required to hit the total number of iterations executed after + // the current thread giving the starting point of the next thread + kmp_uint64 upper_bound_inner = + iter_with_current - ((upper_bound_outer + 1) * upper_bound_outer) / 2; + // adjust the upper bounds down by 1 element to point at the last iteration of + // the current thread the first iteration of the next thread + if (upper_bound_inner == 0) { + // {n,0} => {n-1,n-1} + upper_bound_outer -= 1; + upper_bound_inner = upper_bound_outer; + } else { + // {n,m} => {n,m-1} (m!=0) + upper_bound_inner -= 1; + } + + // assign the values, zeroing out lb1 and ub1 values since the iteration space + // is now one-dimensional + chunk_bounds_nest[0].lb0_u64 = (outer_iters - 1) - upper_bound_outer; + chunk_bounds_nest[1].lb0_u64 = (outer_iters - 1) - upper_bound_inner; + chunk_bounds_nest[0].ub0_u64 = (outer_iters - 1) - lower_bound_outer; + chunk_bounds_nest[1].ub0_u64 = (outer_iters - 1) - lower_bound_inner; + chunk_bounds_nest[0].lb1_u64 = 0; + chunk_bounds_nest[0].ub1_u64 = 0; + chunk_bounds_nest[1].lb1_u64 = 0; + chunk_bounds_nest[1].ub1_u64 = 0; + +#if 0 + printf("tid/nth = %d/%d : From [%llu, %llu] To [%llu, %llu] : Chunks %llu/%llu\n", + tid, nth, chunk_bounds_nest[0].lb0_u64, chunk_bounds_nest[1].lb0_u64, + chunk_bounds_nest[0].ub0_u64, chunk_bounds_nest[1].ub0_u64, iter_current, iter_total); +#endif +} //----------Init API for non-rectangular loops-------------------------------- // Init API for collapsed loops (static, no chunks defined). @@ -1334,6 +1632,19 @@ __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, KMP_DEBUG_ASSERT(tid < nth); + // Handle special cases + nested_loop_type_t loop_type = + kmp_identify_nested_loop_structure(original_bounds_nest, n); + if (loop_type == nested_loop_type_lower_triangular_matrix) { + kmp_handle_lower_triangle_matrix(nth, tid, n, original_bounds_nest, + chunk_bounds_nest); + return TRUE; + } else if (loop_type == nested_loop_type_upper_triangular_matrix) { + kmp_handle_upper_triangle_matrix(nth, tid, n, original_bounds_nest, + chunk_bounds_nest); + return TRUE; + } + CollapseAllocator original_ivs_start(n); if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n, diff --git a/openmp/runtime/src/kmp_collapse.h b/openmp/runtime/src/kmp_collapse.h index e4870185645de0..1044478554a022 100644 --- a/openmp/runtime/src/kmp_collapse.h +++ b/openmp/runtime/src/kmp_collapse.h @@ -45,6 +45,13 @@ enum loop_type_t : kmp_int32 { loop_type_int64 = 7 }; +// Defining loop types to handle special cases +enum nested_loop_type_t : kmp_int32 { + nested_loop_type_unkown = 0, + nested_loop_type_lower_triangular_matrix = 1, + nested_loop_type_upper_triangular_matrix = 2 +}; + /*! @ingroup WORK_SHARING * Describes the structure for rectangular nested loops. @@ -124,14 +131,14 @@ struct bounds_info_t { // It's represented in kmp_uint64, but each dimention is calculated in // that loop IV type. Also dimentions have to be converted to those types // when used in generated code. -typedef kmp_uint64* kmp_point_t; +typedef kmp_uint64 *kmp_point_t; // Array: Number of loop iterations on each nesting level to achieve some point, // in expanded space or in original space. // OMPTODO: move from using iterations to using offsets (iterations multiplied // by steps). For those we need to be careful with the types, as step can be // negative, but it'll remove multiplications and divisions in several places. -typedef kmp_loop_nest_iv_t* kmp_iterations_t; +typedef kmp_loop_nest_iv_t *kmp_iterations_t; // Internal struct with additional info: template struct bounds_info_internalXX_template { diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c new file mode 100644 index 00000000000000..9d742066cf1fc2 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(unsigned upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound - 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type lower triangle <,< - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = 0; j < i; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = 0; j < i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = i; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c new file mode 100644 index 00000000000000..154ee0f69daa56 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(int upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound + 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type lower triangle <,<= - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = 0; j <= i; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = 0; j <= i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = i + 1; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c new file mode 100644 index 00000000000000..452410025be0c9 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(unsigned upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound + 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type upper triangle <,< - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = i; j < upper_bound; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = i; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = 0; j < i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} From 0cd7942c7f7a6f0c8a749c5f0d6d758e0a6fd9d9 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Thu, 7 Mar 2024 16:34:20 -0800 Subject: [PATCH 131/158] [llvm-dwarfdump] Fix parsing DW_CFA_AARCH64_negate_ra_state (#84128) The saved state of the AARCH64_DWARF_PAUTH_RA_STATE register was not updated, so `llvm-dwarfdump` continued to dump it as `reg34=1` even if the correct value is `0`: ``` > llvm-dwarfdump -v test.o ... 0000002c 00000024 00000030 FDE cie=00000000 pc=00000030...00000064 Format: DWARF32 DW_CFA_advance_loc: 4 DW_CFA_AARCH64_negate_ra_state: DW_CFA_advance_loc: 4 DW_CFA_def_cfa_offset: +16 DW_CFA_offset: W30 -16 DW_CFA_remember_state: DW_CFA_advance_loc: 16 DW_CFA_def_cfa_offset: +0 DW_CFA_advance_loc: 4 DW_CFA_AARCH64_negate_ra_state: DW_CFA_restore: W30 DW_CFA_advance_loc: 4 DW_CFA_restore_state: DW_CFA_advance_loc: 12 DW_CFA_def_cfa_offset: +0 DW_CFA_advance_loc: 4 DW_CFA_AARCH64_negate_ra_state: DW_CFA_restore: W30 DW_CFA_nop: 0x30: CFA=WSP 0x34: CFA=WSP: reg34=1 0x38: CFA=WSP+16: W30=[CFA-16], reg34=1 0x48: CFA=WSP: W30=[CFA-16], reg34=1 0x4c: CFA=WSP: reg34=1 <--- should be '=0' 0x50: CFA=WSP+16: W30=[CFA-16], reg34=1 0x5c: CFA=WSP: W30=[CFA-16], reg34=1 0x60: CFA=WSP: reg34=1 <--- should be '=0' ``` --- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 2 ++ ...sign-return-address-cfi-negate-ra-state.ll | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index aae1668c1639c4..0c968703f80b0d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -630,6 +630,8 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row, if (LRLoc->getLocation() == UnwindLocation::Constant) { // Toggle the constant value from 0 to 1 or 1 to 0. LRLoc->setConstant(LRLoc->getConstant() ^ 1); + Row.getRegisterLocations().setRegisterLocation( + AArch64DWARFPAuthRaState, *LRLoc); } else { return createStringError( errc::invalid_argument, diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll index da2c2985acf971..9464e3447993b3 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll @@ -213,6 +213,10 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state +; CHECK-DUMP: CFA=WSP{{$}} +; CHECK-DUMP: reg34=1 +; CHECK-DUMP-NOT: reg34=0 + ; baz_async ; CHECK-DUMP-LABEL: FDE ; CHECK-DUMP: Format: DWARF32 @@ -222,9 +226,24 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: DW_CFA_restore_state: ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: CFA=WSP{{$}} +;; First DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=1 +;; Second DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=0 +;; DW_CFA_restore_state: +; CHECK-DUMP: reg34=1 +;; Third DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=0 +; CHECK-DUMP-NOT: reg34= + ; baz_sync ; CHECK-DUMP-LABEL: FDE ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP-NOT: DW_CFA_AARCH64_negate_ra_state ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state + +; CHECK-DUMP: CFA=WSP{{$}} +; CHECK-DUMP: reg34=1 +; CHECK-DUMP-NOT: reg34=0 From fe8476472467acd15a4d3771313e5532d1eb032f Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Thu, 7 Mar 2024 16:34:36 -0800 Subject: [PATCH 132/158] [DWARF] Dump an updated location for DW_CFA_advance_loc* (#84274) When dumping FDEs, `readelf` prints new location values after `DW_CFA_advance_loc(*)` instructions, which looks quite convenient: ``` > readelf -wf test.o ... ... FDE ... pc=0000000000000030..0000000000000064 DW_CFA_advance_loc: 4 to 0000000000000034 ... DW_CFA_advance_loc: 4 to 0000000000000038 ... ``` This patch makes `llvm-dwarfdump` and `llvm-readobj` do the same. --- .../llvm/DebugInfo/DWARF/DWARFDebugFrame.h | 6 ++-- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 19 +++++++---- .../dwarfdump-debug-frame-simple.test | 6 ++-- llvm/test/tools/llvm-readobj/ELF/unwind.test | 34 +++++++++---------- llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h | 7 ++-- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index bc35f2ab988ed2..c7c558850a2805 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -454,8 +454,8 @@ class CFIProgram { /// where a problem occurred in case an error is returned. Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset); - void dump(raw_ostream &OS, DIDumpOptions DumpOpts, - unsigned IndentLevel = 1) const; + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, unsigned IndentLevel, + std::optional InitialLocation) const; void addInstruction(const Instruction &I) { Instructions.push_back(I); } @@ -524,7 +524,7 @@ class CFIProgram { /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, const Instruction &Instr, unsigned OperandIdx, - uint64_t Operand) const; + uint64_t Operand, std::optional &Address) const; }; /// An entry in either debug_frame or eh_frame. This entry can be a CIE or an diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 0c968703f80b0d..aff26824dda104 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -860,7 +860,8 @@ CFIProgram::getOperandTypes() { /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, const Instruction &Instr, unsigned OperandIdx, - uint64_t Operand) const { + uint64_t Operand, + std::optional &Address) const { assert(OperandIdx < MaxOperands); uint8_t Opcode = Instr.Opcode; OperandType Type = getOperandTypes()[Opcode][OperandIdx]; @@ -879,6 +880,7 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, break; case OT_Address: OS << format(" %" PRIx64, Operand); + Address = Operand; break; case OT_Offset: // The offsets are all encoded in a unsigned form, but in practice @@ -890,7 +892,11 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, if (CodeAlignmentFactor) OS << format(" %" PRId64, Operand * CodeAlignmentFactor); else - OS << format(" %" PRId64 "*code_alignment_factor" , Operand); + OS << format(" %" PRId64 "*code_alignment_factor", Operand); + if (Address && CodeAlignmentFactor) { + *Address += Operand * CodeAlignmentFactor; + OS << format(" to 0x%" PRIx64, *Address); + } break; case OT_SignedFactDataOffset: if (DataAlignmentFactor) @@ -920,13 +926,14 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, } void CFIProgram::dump(raw_ostream &OS, DIDumpOptions DumpOpts, - unsigned IndentLevel) const { + unsigned IndentLevel, + std::optional Address) const { for (const auto &Instr : Instructions) { uint8_t Opcode = Instr.Opcode; OS.indent(2 * IndentLevel); OS << callFrameString(Opcode) << ":"; for (unsigned i = 0; i < Instr.Ops.size(); ++i) - printOperand(OS, DumpOpts, Instr, i, Instr.Ops[i]); + printOperand(OS, DumpOpts, Instr, i, Instr.Ops[i], Address); OS << '\n'; } } @@ -977,7 +984,7 @@ void CIE::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << "\n"; } OS << "\n"; - CFIs.dump(OS, DumpOpts); + CFIs.dump(OS, DumpOpts, /*IndentLevel=*/1, /*InitialLocation=*/{}); OS << "\n"; if (Expected RowsOrErr = UnwindTable::create(this)) @@ -1005,7 +1012,7 @@ void FDE::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << " Format: " << FormatString(IsDWARF64) << "\n"; if (LSDAAddress) OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); - CFIs.dump(OS, DumpOpts); + CFIs.dump(OS, DumpOpts, /*IndentLevel=*/1, InitialLocation); OS << "\n"; if (Expected RowsOrErr = UnwindTable::create(this)) diff --git a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test index 6c049af43efe74..2cd281c8d0af9f 100644 --- a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test +++ b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test @@ -12,15 +12,15 @@ ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES: 00000014 00000010 00000000 FDE cie=00000000 pc=00000000...00000022 -; FRAMES: DW_CFA_advance_loc: 3 +; FRAMES: DW_CFA_advance_loc: 3 to 0x3 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +12 ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080 -; FRAMES: DW_CFA_advance_loc: 1 +; FRAMES: DW_CFA_advance_loc: 1 to 0x31 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +8 ; FRAMES-NEXT: DW_CFA_offset: {{reg5|EBP}} -8 -; FRAMES-NEXT: DW_CFA_advance_loc: 2 +; FRAMES-NEXT: DW_CFA_advance_loc: 2 to 0x33 ; FRAMES-NEXT: DW_CFA_def_cfa_register: {{reg5|EBP}} ; FRAMES-NOT: CIE diff --git a/llvm/test/tools/llvm-readobj/ELF/unwind.test b/llvm/test/tools/llvm-readobj/ELF/unwind.test index 2deb1a587d2438..2e51ec2a61a637 100644 --- a/llvm/test/tools/llvm-readobj/ELF/unwind.test +++ b/llvm/test/tools/llvm-readobj/ELF/unwind.test @@ -96,9 +96,9 @@ # CHECK: Program: # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 -# CHECK-NEXT: DW_CFA_advance_loc: 6 +# CHECK-NEXT: DW_CFA_advance_loc: 6 to 0x4004a6 # CHECK-NEXT: DW_CFA_def_cfa_offset: +24 -# CHECK-NEXT: DW_CFA_advance_loc: 10 +# CHECK-NEXT: DW_CFA_advance_loc: 10 to 0x4004b0 # CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg7 +8, DW_OP_breg16 +0, DW_OP_lit15, DW_OP_and, DW_OP_lit11, DW_OP_ge, DW_OP_lit3, DW_OP_shl, DW_OP_plus # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -110,12 +110,12 @@ # CHECK-NEXT: address_range: 0x10 (end : 0x4005c6) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 1 +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x4005b7 # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 # CHECK-NEXT: DW_CFA_offset: reg6 -16 -# CHECK-NEXT: DW_CFA_advance_loc: 3 +# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x4005ba # CHECK-NEXT: DW_CFA_def_cfa_register: reg6 -# CHECK-NEXT: DW_CFA_advance_loc: 11 +# CHECK-NEXT: DW_CFA_advance_loc: 11 to 0x4005c5 # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -126,15 +126,15 @@ # CHECK-NEXT: address_range: 0xc7f (end : 0x40124f) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005d5 # CHECK-NEXT: DW_CFA_def_cfa: reg10 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 9 +# CHECK-NEXT: DW_CFA_advance_loc: 9 to 0x4005de # CHECK-NEXT: DW_CFA_expression: reg6 DW_OP_breg6 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005e3 # CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg6 -8, DW_OP_deref -# CHECK-NEXT: DW_CFA_advance_loc2: 3174 +# CHECK-NEXT: DW_CFA_advance_loc2: 3174 to 0x401249 # CHECK-NEXT: DW_CFA_def_cfa: reg10 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40124e # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -146,21 +146,21 @@ # CHECK-NEXT: address_range: 0x66 (end : 0x4012b6) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 1 +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x401251 # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 # CHECK-NEXT: DW_CFA_offset: reg6 -16 -# CHECK-NEXT: DW_CFA_advance_loc: 3 +# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x401254 # CHECK-NEXT: DW_CFA_def_cfa_register: reg6 -# CHECK-NEXT: DW_CFA_advance_loc: 2 +# CHECK-NEXT: DW_CFA_advance_loc: 2 to 0x401256 # CHECK-NEXT: DW_CFA_offset: reg15 -24 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40125b # CHECK-NEXT: DW_CFA_offset: reg14 -32 -# CHECK-NEXT: DW_CFA_advance_loc: 7 +# CHECK-NEXT: DW_CFA_advance_loc: 7 to 0x401262 # CHECK-NEXT: DW_CFA_offset: reg13 -40 # CHECK-NEXT: DW_CFA_offset: reg12 -48 -# CHECK-NEXT: DW_CFA_advance_loc: 8 +# CHECK-NEXT: DW_CFA_advance_loc: 8 to 0x40126a # CHECK-NEXT: DW_CFA_offset: reg3 -56 -# CHECK-NEXT: DW_CFA_advance_loc1: 75 +# CHECK-NEXT: DW_CFA_advance_loc1: 75 to 0x4012b5 # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: diff --git a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h index 687d97abd0232d..2e89463e68d519 100644 --- a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h +++ b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h @@ -196,6 +196,7 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { reportError(std::move(E), ObjF.getFileName()); for (const dwarf::FrameEntry &Entry : EHFrame) { + std::optional InitialLocation; if (const dwarf::CIE *CIE = dyn_cast(&Entry)) { W.startLine() << format("[0x%" PRIx64 "] CIE length=%" PRIu64 "\n", Address + CIE->getOffset(), CIE->getLength()); @@ -214,8 +215,9 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { Address + FDE->getLinkedCIE()->getOffset()); W.indent(); + InitialLocation = FDE->getInitialLocation(); W.startLine() << format("initial_location: 0x%" PRIx64 "\n", - FDE->getInitialLocation()); + *InitialLocation); W.startLine() << format( "address_range: 0x%" PRIx64 " (end : 0x%" PRIx64 ")\n", FDE->getAddressRange(), @@ -227,7 +229,8 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { W.indent(); auto DumpOpts = DIDumpOptions(); DumpOpts.IsEH = true; - Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel()); + Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel(), + InitialLocation); W.unindent(); W.unindent(); W.getOStream() << "\n"; From 99512b1728bcf47dbf28f8a4cf5d296109fb0630 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 7 Mar 2024 16:37:41 -0800 Subject: [PATCH 133/158] [Object] Convert tests to opaque pointers (NFC) Link: https://discourse.llvm.org/t/enabling-opaque-pointers-by-default/61322 --- llvm/test/Object/Inputs/small.ll | 10 +++++----- llvm/test/Object/Inputs/trivial.ll | 10 +++++----- llvm/test/Object/X86/irsymtab-bad-alias.ll | 4 ++-- llvm/test/Object/X86/nm-ir.ll | 10 +++++----- llvm/test/Object/dllimport-globalref.ll | 2 +- llvm/test/Object/dllimport.ll | 2 +- llvm/test/Object/mangle-ir.ll | 4 ++-- llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll | 8 ++++---- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/test/Object/Inputs/small.ll b/llvm/test/Object/Inputs/small.ll index ef68a8c324a32f..677f20ade4c5bf 100644 --- a/llvm/test/Object/Inputs/small.ll +++ b/llvm/test/Object/Inputs/small.ll @@ -4,15 +4,15 @@ target triple = "i386-pc-windows" define i32 @main() nounwind { entry: - %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind - tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind + %call = tail call i32 @puts(ptr @.str) nounwind + tail call void @SomeOtherFunction() nounwind ret i32 0 } -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind declare void @SomeOtherFunction(...) @var = global i32 0 -@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata" -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }] +@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }] diff --git a/llvm/test/Object/Inputs/trivial.ll b/llvm/test/Object/Inputs/trivial.ll index 82eabc6389fb84..1a6a76298b23ee 100644 --- a/llvm/test/Object/Inputs/trivial.ll +++ b/llvm/test/Object/Inputs/trivial.ll @@ -5,15 +5,15 @@ define i32 @main() nounwind { entry: - %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind - tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind + %call = tail call i32 @puts(ptr @.str) nounwind + tail call void @SomeOtherFunction() nounwind ret i32 0 } -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind declare void @SomeOtherFunction(...) @var = global i32 0 -@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata" -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }] +@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }] diff --git a/llvm/test/Object/X86/irsymtab-bad-alias.ll b/llvm/test/Object/X86/irsymtab-bad-alias.ll index c54436d5921929..7f204d1dd15730 100644 --- a/llvm/test/Object/X86/irsymtab-bad-alias.ll +++ b/llvm/test/Object/X86/irsymtab-bad-alias.ll @@ -11,5 +11,5 @@ target triple = "x86_64-unknown-linux-gnu" @g1 = global i32 1 @g2 = global i32 2 -@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32), - i32 ptrtoint (i32* @g2 to i32)) to i32*) +@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (ptr @g1 to i32), + i32 ptrtoint (ptr @g2 to i32)) to ptr) diff --git a/llvm/test/Object/X86/nm-ir.ll b/llvm/test/Object/X86/nm-ir.ll index e57c6d9a11c6e4..0324efb2948d17 100644 --- a/llvm/test/Object/X86/nm-ir.ll +++ b/llvm/test/Object/X86/nm-ir.ll @@ -29,15 +29,15 @@ module asm ".long undef_asm_sym" @g3 = common global i32 0 @g4 = private global i32 42 -@a1 = alias i32, i32* @g1 -@a2 = internal alias i32, i32* @g1 +@a1 = alias i32, ptr @g1 +@a2 = internal alias i32, ptr @g1 -define void ()* @f1() { +define ptr @f1() { call void @f5() - ret void ()* null + ret ptr null } -@ifunc_f1 = ifunc void (), void ()* ()* @f1 +@ifunc_f1 = ifunc void (), ptr @f1 define internal void @f2() { ret void diff --git a/llvm/test/Object/dllimport-globalref.ll b/llvm/test/Object/dllimport-globalref.ll index dd518bc2266cac..0a95be20a9d175 100644 --- a/llvm/test/Object/dllimport-globalref.ll +++ b/llvm/test/Object/dllimport-globalref.ll @@ -11,4 +11,4 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: U f declare dllimport void @f() -@fp = constant void ()* @f +@fp = constant ptr @f diff --git a/llvm/test/Object/dllimport.ll b/llvm/test/Object/dllimport.ll index afdb4562cc9fb8..52f583fa2487e7 100644 --- a/llvm/test/Object/dllimport.ll +++ b/llvm/test/Object/dllimport.ll @@ -12,6 +12,6 @@ declare dllimport void @f() define void @g() { call void @f() - store i32 42, i32* @v + store i32 42, ptr @v ret void } diff --git a/llvm/test/Object/mangle-ir.ll b/llvm/test/Object/mangle-ir.ll index bd7c3d93b7c9a2..76442f070385ec 100644 --- a/llvm/test/Object/mangle-ir.ll +++ b/llvm/test/Object/mangle-ir.ll @@ -7,8 +7,8 @@ target datalayout = "m:o" ; CHECK-NOT: memcpy define void @f() { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* null, i64 0, i1 false) + tail call void @llvm.memcpy.p0.p0.i64(ptr null, ptr null, i64 0, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) diff --git a/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll b/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll index d2518f46cc27a8..c506c9687ec2a4 100644 --- a/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll +++ b/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll @@ -5,11 +5,11 @@ target triple = "x86_64-apple-macosx10.15.0" -@llvm.used = appending global [1 x i8*] [i8* bitcast (i16* @__swift_reflection_version to i8*)], section "llvm.metadata", align 8 +@llvm.used = appending global [1 x ptr] [ptr @__swift_reflection_version], section "llvm.metadata", align 8 @__swift_reflection_version = linkonce_odr hidden constant i16 3 -define i32 @main(i32 %0, i8** %1) #0 { - %3 = bitcast i8** %1 to i8* +define i32 @main(i32 %0, ptr %1) #0 { + %3 = bitcast ptr %1 to ptr ret i32 0 } @@ -25,7 +25,7 @@ attributes #0 = { "frame-pointer"="all" "target-cpu"="penryn" "target-features"= !1 = !{!"-lswiftSwiftOnoneSupport"} !2 = !{!"-lswiftCore"} !3 = !{!"-lobjc"} -!4 = !{[1 x i8*]* @llvm.used, null, null, i1 false, i1 true} +!4 = !{ptr @llvm.used, null, null, i1 false, i1 true} !5 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 15]} !6 = !{i32 1, !"Objective-C Version", i32 2} !7 = !{i32 1, !"Objective-C Image Info Version", i32 0} From 30f098ef9dfb121bccaef6975b13788b6f940e47 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 7 Mar 2024 16:39:57 -0800 Subject: [PATCH 134/158] [Instrumentation] Convert tests to opaque pointers (NFC) Link: https://discourse.llvm.org/t/enabling-opaque-pointers-by-default/61322 --- .../Instrumentation/AddressSanitizer/aarch64be.ll | 4 ++-- .../AddressSanitizer/program-addrspace.ll | 2 +- .../InstrProfiling/before-value-profile-lowering.ll | 12 ++++++------ .../InstrProfiling/timestamp-coverage.ll | 8 ++++---- .../test/Instrumentation/InstrProfiling/timestamp.ll | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll b/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll index eb522a0f3f3173..aeb1b0e8ebe778 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -passes=asan -S -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefix=CHECK-AARCH64BE %s ; REQUIRES: aarch64-registered-target -define i32 @read_4_bytes(i32* %a) sanitize_address { +define i32 @read_4_bytes(ptr %a) sanitize_address { entry: - %tmp1 = load i32, i32* %a, align 4 + %tmp1 = load i32, ptr %a, align 4 ret i32 %tmp1 } diff --git a/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll b/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll index adfe21135e7ada..1d5bfb09ead97c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll @@ -16,7 +16,7 @@ target datalayout = "P1" define i1 @b(i64 %c) addrspace(1) { %cast = inttoptr i64 %c to ptr addrspace(42) - %cmp = icmp ugt ptr addrspace(42) %cast, getelementptr inbounds ([1 x i32], ptr addrspace(42) @a, i64 0, i64 0) + %cmp = icmp ugt ptr addrspace(42) %cast, @a ret i1 %cmp } diff --git a/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll b/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll index 5dfec433f4ecb3..870e74ccfdac46 100644 --- a/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll +++ b/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll @@ -7,17 +7,17 @@ target triple = "x86_64-unknown-linux-gnu" -declare void @llvm.instrprof.increment.step(i8*, i64, i32, i32, i64) +declare void @llvm.instrprof.increment.step(ptr, i64, i32, i32, i64) -declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32) +declare void @llvm.instrprof.value.profile(ptr, i64, i64, i32, i32) ; CHECK: @__profd_foo = private global @__profn_foo = private constant [3 x i8] c"foo" -define i32 @foo(i32 ()* ) { - %2 = ptrtoint i32 ()* %0 to i64 - call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i64 %2, i32 0, i32 0) - call void @llvm.instrprof.increment.step(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0, i64 0) +define i32 @foo(ptr ) { + %2 = ptrtoint ptr %0 to i64 + call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 0, i64 %2, i32 0, i32 0) + call void @llvm.instrprof.increment.step(ptr @__profn_foo, i64 0, i32 1, i32 0, i64 0) %3 = tail call i32 %0() ret i32 %3 } diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll index ab9b664a2cff65..d40cc2ac02c1bc 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0) + call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 9, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8) + call void @llvm.instrprof.cover(ptr @__profn_foo, i64 12345678, i32 9, i32 8) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.cover(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.cover(ptr, i64, i32, i32) diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll index aa2393695d6b85..c08ba4485fc5dd 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0) + call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 2, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1) + call void @llvm.instrprof.increment(ptr @__profn_foo, i64 12345678, i32 2, i32 1) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.increment(ptr, i64, i32, i32) From 9d3bf9b639eafeded82c6be295031262735d1dac Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 7 Mar 2024 16:41:50 -0800 Subject: [PATCH 135/158] [NFC] [hwasan] consistent naming for cl::opt --- .../Instrumentation/HWAddressSanitizer.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 289183ecf0f286..6bae679e11be23 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,15 +187,15 @@ static cl::opt cl::desc("Use selective instrumentation"), cl::Hidden, cl::init(false)); -static cl::opt HotPercentileCutoff( +static cl::opt ClHotPercentileCutoff( "hwasan-percentile-cutoff-hot", cl::init(0), cl::desc("Alternative hot percentile cuttoff." "By default `-profile-summary-cutoff-hot` is used.")); static cl::opt - RandomSkipRate("hwasan-random-skip-rate", cl::init(0), - cl::desc("Probability value in the range [0.0, 1.0] " - "to skip instrumentation of a function.")); + ClRandomSkipRate("hwasan-random-skip-rate", cl::init(0), + cl::desc("Probability value in the range [0.0, 1.0] " + "to skip instrumentation of a function.")); STATISTIC(NumTotalFuncs, "Number of total funcs"); STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs"); @@ -301,7 +301,7 @@ class HWAddressSanitizer { ? ClEnableKhwasan : CompileKernel; this->Rng = - RandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr; + ClRandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr; initializeModule(); } @@ -1537,8 +1537,8 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, NumTotalFuncs++; if (CSelectiveInstrumentation) { - if (RandomSkipRate.getNumOccurrences()) { - std::bernoulli_distribution D(RandomSkipRate); + if (ClRandomSkipRate.getNumOccurrences()) { + std::bernoulli_distribution D(ClRandomSkipRate); if (D(*Rng)) return; } else { @@ -1547,10 +1547,10 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, MAMProxy.getCachedResult(*F.getParent()); if (PSI && PSI->hasProfileSummary()) { auto &BFI = FAM.getResult(F); - if ((HotPercentileCutoff.getNumOccurrences() && - HotPercentileCutoff >= 0) + if ((ClHotPercentileCutoff.getNumOccurrences() && + ClHotPercentileCutoff >= 0) ? PSI->isFunctionHotInCallGraphNthPercentile( - HotPercentileCutoff, &F, BFI) + ClHotPercentileCutoff, &F, BFI) : PSI->isFunctionHotInCallGraph(&F, BFI)) return; } else { From fb582b6ace781ff6991775d6dcd4df98aa16698f Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Thu, 7 Mar 2024 17:05:44 -0800 Subject: [PATCH 136/158] [mlir] Implement Mesh's ShardingInterface for Linalg ops (#82284) Allows linalg structured operations to be handled during spmdization and sharding propagation. There is only support for projected permutation indexing maps. --- .../Dialect/Linalg/Transforms/AllInterfaces.h | 26 ++ .../Transforms/MeshShardingInterfaceImpl.h | 20 + mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td | 6 + mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td | 4 + .../Mesh/Interfaces/ShardingInterfaceImpl.h | 18 + .../mlir/Dialect/Mesh/Transforms/Transforms.h | 6 + mlir/include/mlir/IR/Dialect.h | 8 + mlir/include/mlir/InitAllDialects.h | 10 +- mlir/lib/Dialect/Linalg/IR/CMakeLists.txt | 1 + mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp | 7 + .../Linalg/Transforms/AllInterfaces.cpp | 24 ++ .../Dialect/Linalg/Transforms/CMakeLists.txt | 5 + .../Transforms/MeshShardingInterfaceImpl.cpp | 352 ++++++++++++++++++ .../Linalg/Transforms/TilingInterfaceImpl.cpp | 8 - mlir/lib/Dialect/Mesh/IR/MeshOps.cpp | 7 + .../Mesh/Interfaces/ShardingInterface.cpp | 89 ++++- .../Dialect/Mesh/Transforms/Transforms.cpp | 13 + .../test/Dialect/Linalg/mesh-spmdization.mlir | 165 ++++++++ .../llvm-project-overlay/mlir/BUILD.bazel | 4 + 19 files changed, 754 insertions(+), 19 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h create mode 100644 mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h create mode 100644 mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp create mode 100644 mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp create mode 100644 mlir/test/Dialect/Linalg/mesh-spmdization.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h b/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h new file mode 100644 index 00000000000000..a69751e072b797 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h @@ -0,0 +1,26 @@ +//===- AllInterfaces.h - ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a common entry point for registering all external +// interface implementations to the linalg dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H +#define MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H + +namespace mlir { +class DialectRegistry; + +namespace linalg { +void registerAllDialectInterfaceImplementations(DialectRegistry ®istry); +} // namespace linalg + +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h new file mode 100644 index 00000000000000..c57501ea86b7ed --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h @@ -0,0 +1,20 @@ +//===- MeshShardingInterfaceImpl.h ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H +#define MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H + +namespace mlir { +class DialectRegistry; + +namespace linalg { +void registerMeshShardingInterfaceExternalModels(DialectRegistry ®istry); +} // namespace linalg +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td b/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td index fc2acc70381ef7..9d9b5892e1a51f 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td @@ -46,6 +46,12 @@ def Mesh_ReductionKind : I32EnumAttr<"ReductionKind", I32EnumAttrCase<"Sum", 1, "sum">, I32EnumAttrCase<"Max", 2, "max">, I32EnumAttrCase<"Min", 3, "min">, + I32EnumAttrCase<"Product", 4, "product">, + // Arithmetic mean. + I32EnumAttrCase<"Average", 5, "average">, + I32EnumAttrCase<"BitwiseAnd", 6, "bitwise_and">, + I32EnumAttrCase<"BitwiseOr", 7, "bitwise_or">, + I32EnumAttrCase<"BitwiseXor", 8, "bitwise_xor">, I32EnumAttrCase<"Generic", 100, "generic"> ]> { let genSpecializedAttr = 0; diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td index b9cd15e2062669..8e1e475463585e 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td @@ -353,6 +353,10 @@ def Mesh_AllReduceOp : Mesh_CollectiveCommunicationOpBase<"all_reduce", [ attr-dict `:` type($input) `->` type($result) }]; let hasCanonicalizer = 1; + let builders = [ + OpBuilder<(ins "Value":$input, "StringRef":$mesh, + "ArrayRef":$meshAxes, "ReductionKind":$reduction)> + ]; } def Mesh_AllSliceOp : Mesh_CollectiveCommunicationOpBase<"all_slice", [ diff --git a/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h b/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h index ffc9b6fb18be53..ab4df2ab028d43 100644 --- a/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h @@ -22,6 +22,24 @@ class SymbolTableCollection; namespace mesh { +// Retrieve the mesh axes corresponding to each operation loop iterator based +// on the provided shardings for the op's operands and results. +// Assumes that the indexingMaps are projected permutations. +ShardingArray getMeshAxisAssignmentForLoopIterators( + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef indexingMaps); + +bool isAtLeastOneReductionIteratorSharded( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators); + +// Get the set of mesh axes that correspond to reduction loop iterators. +SmallVector getReductionMeshAxes( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators); + // Inserts a clone of the operation that has all ranked tensor // arguments/results sharded. void spmdizeTriviallyShardableOperation( diff --git a/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h index aeab28961a4e1e..be82e2af399dc8 100644 --- a/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h @@ -13,6 +13,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/ArrayRef.h" namespace mlir { class RewritePatternSet; @@ -37,6 +38,11 @@ TypedValue createCollectiveProcessGroupSize(MeshOp mesh, ArrayRef axes, ImplicitLocOpBuilder &builder); +// Get process linear index along the given mesh axes. +TypedValue createProcessLinearIndex(StringRef mesh, + ArrayRef meshAxes, + ImplicitLocOpBuilder &builder); + } // namespace mesh } // namespace mlir diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 50f6f6de5c2897..6c8a170a03c72d 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -216,6 +216,14 @@ class Dialect { {TypeID::get(), InterfaceT::getInterfaceID()}); } + // Declare the same interface for multiple types. + // Example: + // declarePromisedInterfaces() + template + void declarePromisedInterfaces() { + (declarePromisedInterface(), ...); + } + /// Checks if the given interface, which is attempting to be used, is a /// promised interface of this dialect that has yet to be implemented. If so, /// emits a fatal error. `interfaceName` is an optional string that contains a diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 838bd03622a626..21775e11e07149 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -43,10 +43,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/IR/ValueBoundsOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/SubsetInsertionOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h" #include "mlir/Dialect/MLProgram/IR/MLProgram.h" #include "mlir/Dialect/MLProgram/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/MPI/IR/MPI.h" @@ -157,10 +154,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { cf::registerBufferizableOpInterfaceExternalModels(registry); cf::registerBufferDeallocationOpInterfaceExternalModels(registry); gpu::registerBufferDeallocationOpInterfaceExternalModels(registry); - linalg::registerBufferizableOpInterfaceExternalModels(registry); - linalg::registerSubsetOpInterfaceExternalModels(registry); - linalg::registerTilingInterfaceExternalModels(registry); - linalg::registerValueBoundsOpInterfaceExternalModels(registry); + linalg::registerAllDialectInterfaceImplementations(registry); memref::registerAllocationOpInterfaceExternalModels(registry); memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry); memref::registerValueBoundsOpInterfaceExternalModels(registry); diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt index f0ac1899bb02ab..c187563b8f0c4e 100644 --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -25,6 +25,7 @@ add_mlir_dialect_library(MLIRLinalgDialect MLIRInferTypeOpInterface MLIRIR MLIRParser + MLIRShardingInterface MLIRSideEffectInterfaces MLIRSparseTensorDialect MLIRSCFDialect diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp index 5069d43e7db95f..027058d4de6328 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" @@ -118,6 +119,12 @@ void mlir::linalg::LinalgDialect::initialize() { >(namedStructuredOpRegionBuilders); addInterfaces(); + + declarePromisedInterface(); + declarePromisedInterfaces(); } LogicalResult LinalgDialect::verifyOperationAttribute(Operation *op, diff --git a/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp b/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp new file mode 100644 index 00000000000000..281d9f2204486b --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp @@ -0,0 +1,24 @@ +//===- AllInterfaces.cpp - ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h" + +#include "mlir/Dialect/Linalg/IR/ValueBoundsOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/SubsetInsertionOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" + +void mlir::linalg::registerAllDialectInterfaceImplementations( + DialectRegistry ®istry) { + registerBufferizableOpInterfaceExternalModels(registry); + registerMeshShardingInterfaceExternalModels(registry); + registerSubsetOpInterfaceExternalModels(registry); + registerTilingInterfaceExternalModels(registry); + registerValueBoundsOpInterfaceExternalModels(registry); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 4f47e3b8718454..513c54de5d7bfc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRLinalgTransforms + AllInterfaces.cpp BubbleUpExtractSlice.cpp BufferizableOpInterfaceImpl.cpp Bufferize.cpp @@ -21,6 +22,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms InlineScalarOperands.cpp Interchange.cpp Loops.cpp + MeshShardingInterfaceImpl.cpp NamedOpConversions.cpp Padding.cpp Promotion.cpp @@ -61,12 +63,15 @@ add_mlir_dialect_library(MLIRLinalgTransforms MLIRIR MLIRMemRefDialect MLIRMemRefTransforms + MLIRMeshDialect + MLIRMeshTransforms MLIRLinalgDialect MLIRLinalgUtils MLIRSCFDialect MLIRSCFTransforms MLIRSCFUtils MLIRPass + MLIRShardingInterface MLIRSubsetOpInterface MLIRSparseTensorDialect MLIRTensorDialect diff --git a/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp new file mode 100644 index 00000000000000..7ac45dc3eb3efc --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp @@ -0,0 +1,352 @@ +//===- MeshShardingInterfaceImpl.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h" + +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" +#include "mlir/Dialect/Mesh/IR/MeshOps.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h" +#include "mlir/Dialect/Mesh/Transforms/Transforms.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/TilingInterface.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" +#include +#include +#include + +namespace mlir::linalg { + +using MeshAxis = mesh::MeshAxis; +using ReductionKind = mesh::ReductionKind; +using MeshShardingAttr = mesh::MeshShardingAttr; +using ShardingArray = mesh::ShardingArray; +using MeshOp = mesh::MeshOp; + +// Returns the corresponding mesh reduction kind for the given arith op. +static ReductionKind getReductionKind(Operation *op) { + return llvm::TypeSwitch(op) + // Floating-point operations. + .Case([](arith::AddFOp op) { return ReductionKind::Sum; }) + .Case([](arith::MulFOp op) { return ReductionKind::Product; }) + // TODO: handle maxnumf and minnumf. + .Case([](arith::MaximumFOp op) { return ReductionKind::Max; }) + .Case([](arith::MinimumFOp op) { return ReductionKind::Min; }) + // Integer operations. + .Case([](arith::AddIOp op) { return ReductionKind::Sum; }) + .Case([](arith::OrIOp op) { return ReductionKind::BitwiseOr; }) + .Case([](arith::XOrIOp op) { return ReductionKind::BitwiseXor; }) + .Case([](arith::AndIOp op) { return ReductionKind::Sum; }) + // TODO: handle signless, signed and unsigned types properly. + // It is assumed that the element type of the collective operands and + // result drive the meaning of the reduction kind, whether it is signed + // or unsigned. + // The reduction op inside the linalg op may have different result type + // from the element type of the linalg op's result. + // Also signed and unsigned Arith dialect ops may accept signed, unsigned + // or signless operands. + // Maybe expand the reduction kinds. + .Case([](arith::MaxUIOp op) { return ReductionKind::Max; }) + .Case([](arith::MinUIOp op) { return ReductionKind::Min; }) + .Case([](arith::MaxSIOp op) { return ReductionKind::Max; }) + .Case([](arith::MinSIOp op) { return ReductionKind::Min; }) + .Case([](arith::MulIOp op) { return ReductionKind::Product; }) + .Default([](Operation *op) { return ReductionKind::Generic; }); +} + +static std::optional getCombinerOp(LinalgOp op) { + SmallVector combinerOps; + Value reducedValue = matchReduction(op.getRegionOutputArgs(), 0, combinerOps); + if (!reducedValue || combinerOps.size() != 1) { + return std::nullopt; + } + + return combinerOps[0]; +} + +static ReductionKind getReductionKindOfLinalgOp(LinalgOp op) { + std::optional reductionOp = getCombinerOp(op); + if (!reductionOp) { + return ReductionKind::Generic; + } + Type resultElementType = + llvm::cast(op->getResult(0).getType()).getElementType(); + // TODO: handle case when result type of the reduction op does not match the + // element type of the result tensor. + // Would it makes sense at all? + assert(resultElementType == reductionOp.value()->getResult(0).getType()); + return getReductionKind(reductionOp.value()); +} + +static MeshOp getMesh(Operation *op, + ArrayRef operandShardings, + ArrayRef resultShardings, + SymbolTableCollection &symbolTable) { + for (MeshShardingAttr sharding : operandShardings) { + if (sharding) { + return mesh::getMesh(op, sharding.getMesh(), symbolTable); + } + } + + for (MeshShardingAttr sharding : resultShardings) { + if (sharding) { + return mesh::getMesh(op, sharding.getMesh(), symbolTable); + } + } + + assert(false); +} + +// Choose the operand based on the current process index along the reduction +// mesh axes. +// We need to use the initial value only once to avoid including it in the +// reduction multiple times. +// In each process group only the leading process with linear index 0 would use +// the original operand. +// The other processes would use the reduction operation neutral tensor. +static Value createDestinationPassingStyleInitOperand( + LinalgOp op, Value spmdizedOperand, ArrayRef reductionMeshAxes, + MeshOp meshOp, ImplicitLocOpBuilder &builder) { + Value processLinearIndexInReductionGroup = mesh::createProcessLinearIndex( + meshOp.getSymName(), reductionMeshAxes, builder); + Value zero = builder.create(0); + Value isLeadProcess = builder.create( + builder.getI1Type(), arith::CmpIPredicate::eq, + processLinearIndexInReductionGroup, zero); + scf::IfOp ifOp = builder.create(spmdizedOperand.getType(), + isLeadProcess, true, true); + // Then block. + { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointToEnd(&ifOp.getThenRegion().front()); + builder.create(spmdizedOperand); + } + + // Else block. + { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointToEnd(&ifOp.getElseRegion().front()); + SmallVector shape = + tensor::getMixedSizes(builder, builder.getLoc(), spmdizedOperand); + PartialReductionOpInterface partialReductionIface = + llvm::cast(op.getOperation()); + FailureOr reductionNeutralTensorOp = + partialReductionIface.generateInitialTensorForPartialReduction( + builder, builder.getLoc(), shape, {}); + assert(succeeded(reductionNeutralTensorOp)); + builder.create( + reductionNeutralTensorOp.value()->getResult(0)); + } + return ifOp.getResult(0); +} + +// Create the DPS init operands for the spmdized Linalg op. +// Return all the new spmdized operands. +static SmallVector createDestinationPassingStyleInitOperands( + LinalgOp op, MeshOp meshOp, ArrayRef spmdizedOperands, + ArrayRef reductionMeshAxes, IRMapping &spmdizationMap, + ImplicitLocOpBuilder &builder) { + // TODO: add support for multiple destination passing style initial value + // operands. + // PartialReductionOpInterface::generateInitialTensorForPartialReduction + // needs to also support multiple DPS initial operands. + SmallVector newOperands = llvm::to_vector(spmdizedOperands); + auto operandIdx = op.getDpsInitOperand(0)->getOperandNumber(); + Value spmdizedInitOperand = + spmdizationMap.lookup(op->getOperands()[operandIdx]); + newOperands[operandIdx] = createDestinationPassingStyleInitOperand( + op, spmdizedInitOperand, reductionMeshAxes, meshOp, builder); + return newOperands; +} + +static void createAllReduceForResultWithoutPartialSharding( + Value unshardedLinalgOpResult, ArrayRef opReductionMeshAxes, + MeshShardingAttr resultSharding, ReductionKind reductionKind, + IRMapping &spmdizationMap, ImplicitLocOpBuilder &builder) { + SmallVector allReduceMeshAxes; + llvm::copy_if(opReductionMeshAxes, std::back_inserter(allReduceMeshAxes), + [&resultSharding](MeshAxis axis) { + return !llvm::is_contained(resultSharding.getPartialAxes(), + axis); + }); + if (allReduceMeshAxes.empty()) { + return; + } + + Value spmdizedLinalgOpResult = spmdizationMap.lookup(unshardedLinalgOpResult); + Value reducedValue = builder.create( + spmdizedLinalgOpResult, resultSharding.getMesh().getValue(), + allReduceMeshAxes, reductionKind); + spmdizationMap.map(unshardedLinalgOpResult, reducedValue); +} + +static void createAllReduceForResultsWithoutPartialShardings( + LinalgOp unshardedOp, ArrayRef opReductionMeshAxes, + ArrayRef resultShardings, IRMapping &spmdizationMap, + ImplicitLocOpBuilder &builder) { + ReductionKind reductionKind = getReductionKindOfLinalgOp(unshardedOp); + for (auto [unshardedLinalgOpResult, resultSharding] : + llvm::zip_equal(unshardedOp->getResults(), resultShardings)) { + createAllReduceForResultWithoutPartialSharding( + unshardedLinalgOpResult, opReductionMeshAxes, resultSharding, + reductionKind, spmdizationMap, builder); + } +} + +static void spmdizeLinalgOpWithShardedReduction( + LinalgOp op, ArrayRef spmdizedOperands, + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators, + IRMapping &spmdizationMap, SymbolTableCollection &symbolTable, + ImplicitLocOpBuilder &builder) { + MeshOp mesh = getMesh(op, operandShardings, resultShardings, symbolTable); + SmallVector reductionMeshAxes = mesh::getReductionMeshAxes( + loopIteratorTypes, meshAxisAssignmentForLoopIterators); + SmallVector spmdizedLinalgOpOperands = + createDestinationPassingStyleInitOperands(op, mesh, spmdizedOperands, + reductionMeshAxes, + spmdizationMap, builder); + // We must not change the operand mappings of the original spmdizationMap as + // they are the mappings for the whole spmdization blob and may be used by + // others. + IRMapping internalSpmdizationMap; + for (auto [unshardedOperand, spmdizedOperand] : + llvm::zip_equal(op->getOperands(), spmdizedLinalgOpOperands)) { + internalSpmdizationMap.map(unshardedOperand, spmdizedOperand); + } + spmdizeTriviallyShardableOperation( + *op, spmdizedLinalgOpOperands, operandShardings, resultShardings, + internalSpmdizationMap, symbolTable, builder); + for (Value result : op->getResults()) { + spmdizationMap.map(result, internalSpmdizationMap.lookup(result)); + } + + // Handle partial shardings. + createAllReduceForResultsWithoutPartialShardings( + op, reductionMeshAxes, resultShardings, spmdizationMap, builder); +} + +namespace { + +// ShardingInterface for ops that implement LinalgStructuredInterface. +// The supported ops are only those where the indexing maps are projected +// permutations. +template +struct StructuredOpShardingInterface + : public mesh::ShardingInterface::ExternalModel< + StructuredOpShardingInterface, Op> { + SmallVector getLoopIteratorTypes(Operation *op) const { + return llvm::cast(op).getIteratorTypesArray(); + } + + SmallVector getIndexingMaps(Operation *op) const { + LinalgOp linalgOp = llvm::cast(op); + SmallVector res = linalgOp.getIndexingMapsArray(); + + // Results must have the same indexing as destination passing style initial + // operands. + for (int64_t i = 0; i < linalgOp.getNumDpsInits(); ++i) { + res.push_back(res[linalgOp.getDpsInitOperand(i)->getOperandNumber()]); + } + + return res; + } + + LogicalResult spmdize(Operation *op, ArrayRef spmdizedOperands, + ArrayRef operandShardings, + ArrayRef resultShardings, + IRMapping &spmdizationMap, + SymbolTableCollection &symbolTable, + OpBuilder &builder) const { + LinalgOp linalgOp = llvm::cast(op); + + SmallVector indexingMaps = linalgOp.getIndexingMapsArray(); + bool allIndexingMapsAreProjectedPermutation = + llvm::all_of(indexingMaps, [](AffineMap map) { + return map.isProjectedPermutation(); + }); + if (!allIndexingMapsAreProjectedPermutation) { + // TODO: handle non-projected permutations. + return op->emitOpError() + << "supports indexing maps that are only projected permutation."; + } + + SmallVector loopIteratorTypes = + linalgOp.getIteratorTypesArray(); + ShardingArray meshAxisAssignmentForLoopIterators = + getMeshAxisAssignmentForLoopIterators(operandShardings, resultShardings, + loopIteratorTypes, indexingMaps); + if (mesh::isAtLeastOneReductionIteratorSharded( + loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + ImplicitLocOpBuilder implicitLocBuilder(op->getLoc(), builder); + spmdizeLinalgOpWithShardedReduction( + linalgOp, spmdizedOperands, operandShardings, resultShardings, + loopIteratorTypes, meshAxisAssignmentForLoopIterators, spmdizationMap, + symbolTable, implicitLocBuilder); + } else { + spmdizeTriviallyShardableOperation(*op, spmdizedOperands, + operandShardings, resultShardings, + spmdizationMap, symbolTable, builder); + } + + return success(); + } +}; + +} // namespace + +template +static void registerOne(MLIRContext *ctx) { + OpType::template attachInterface>(*ctx); +} + +/// Variadic helper function. +template +static void registerAll(MLIRContext *ctx) { + (registerOne(ctx), ...); +} + +void registerMeshShardingInterfaceExternalModels(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) { + DialectRegistry registry; + registry.insert(); + ctx->appendDialectRegistry(registry); + for (StringRef name : registry.getDialectNames()) + ctx->getOrLoadDialect(name); + + registerOne(ctx); + registerAll< +#define GET_OP_LIST +#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" + >(ctx); + }); +} + +} // namespace mlir::linalg diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index 8b3119f02e8fda..bd870d4f982e5d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -275,14 +275,6 @@ struct LinalgOpPartialReductionInterface ArrayRef oldShape = linalgOp.getShape(linalgOp.getDpsInitOperand(0)); - // Extend tile size vector to the rank of the output tensor. - SmallVector tileSizeVector = - getValueOrCreateConstantIndexOp(b, loc, sizes); - if (tileSizeVector.size() < oldShape.size()) { - auto zero = b.create(loc, 0); - tileSizeVector.append(oldShape.size() - tileSizeVector.size(), zero); - } - // Calculate the new shape, we insert the new dimensions based on the index // of the reduction dimensions. SmallVector newOutputShape; diff --git a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp index 50163880e85f96..03f11ad1f94965 100644 --- a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp +++ b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp @@ -647,6 +647,13 @@ void AllReduceOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void AllReduceOp::build(OpBuilder &odsBuilder, OperationState &odsState, + Value input, StringRef mesh, + ArrayRef meshAxes, ReductionKind reduction) { + build(odsBuilder, odsState, input.getType(), mesh, meshAxes, input, + reduction); +} + void AllReduceOp::getAsmResultNames( function_ref setNameFn) { setNameFn(getResult(), "all_reduce"); diff --git a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp index fe3d7c44413fef..9acee5aa8d8604 100644 --- a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp +++ b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp @@ -539,8 +539,9 @@ static bool areValuesCompatibleWithFullReplicationShardings( if (std::size(values) != std::size(shardings)) { return false; } - return llvm::all_of(llvm::zip(std::forward(values), - std::forward(shardings)), + return llvm::all_of(llvm::zip_equal( + std::forward(values), + std::forward(shardings)), [](auto valueAndSharding) { return isValueCompatibleWithFullReplicationSharding( std::get<0>(valueAndSharding), @@ -563,6 +564,88 @@ void mesh::spmdizeFullyReplicatedOperation( builder.clone(op, spmdizationMap); } +static void updateMeshAxisAssignmentForLoopIterators( + ArrayRef meshAxesAssignmentForTensorAxis, AffineExpr indexingExpr, + SmallVector>> + &meshAxesAssignmentForLoopIterators) { + AffineDimExpr affineDimExpr = cast(indexingExpr); + unsigned loopIteratorIdx = affineDimExpr.getPosition(); + if (meshAxesAssignmentForLoopIterators[loopIteratorIdx]) { + assert(llvm::equal(meshAxesAssignmentForTensorAxis, + *meshAxesAssignmentForLoopIterators[loopIteratorIdx])); + } else { + meshAxesAssignmentForLoopIterators[loopIteratorIdx] = + llvm::to_vector(meshAxesAssignmentForTensorAxis); + } +} + +ShardingArray mesh::getMeshAxisAssignmentForLoopIterators( + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef indexingMaps) { + SmallVector>> + meshAxisAssignmentForLoopIterators(loopIteratorTypes.size()); + SmallVector operatorAndResultShardings; + operatorAndResultShardings.reserve(operandShardings.size() + + resultShardings.size()); + llvm::append_range(operatorAndResultShardings, operandShardings); + for (auto [sharding, affineMap] : + llvm::zip_equal(operatorAndResultShardings, indexingMaps)) { + if (!sharding) { + continue; + } + for (auto [meshAxesAssignmentForTensorAxis, indexingExpr] : + llvm::zip(sharding.getSplitAxes(), affineMap.getResults())) { + updateMeshAxisAssignmentForLoopIterators( + meshAxesAssignmentForTensorAxis.asArrayRef(), indexingExpr, + meshAxisAssignmentForLoopIterators); + } + // Missing trailing split axes means replication on those tensor dimensions. + for (unsigned i = sharding.getSplitAxes().size(); + i < affineMap.getNumResults(); ++i) { + updateMeshAxisAssignmentForLoopIterators( + {}, affineMap.getResults()[i], meshAxisAssignmentForLoopIterators); + } + } + + ShardingArray res; + llvm::transform(meshAxisAssignmentForLoopIterators, std::back_inserter(res), + [](std::optional> &axes) { + if (!axes) { + return SmallVector(); + }; + return std::move(*axes); + }); + return res; +} + +bool mesh::isAtLeastOneReductionIteratorSharded( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators) { + for (auto [loopIteratorType, meshAxisAssignment] : + llvm::zip_equal(loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + if (loopIteratorType == utils::IteratorType::reduction && + !meshAxisAssignment.empty()) { + return true; + } + } + return false; +} + +SmallVector mesh::getReductionMeshAxes( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators) { + SmallVector meshAxes; + for (auto [loopIteratorType, meshAxisAssignment] : + llvm::zip_equal(loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + if (loopIteratorType == utils::IteratorType::reduction) { + llvm::append_range(meshAxes, meshAxisAssignment); + } + } + return meshAxes; +} + void mesh::spmdizeTriviallyShardableOperation( Operation &op, ArrayRef spmdizedOperands, ArrayRef operandShardings, @@ -572,7 +655,7 @@ void mesh::spmdizeTriviallyShardableOperation( Operation *newOp = builder.clone(op, spmdizationMap); // Set the result types to the sharded counterparts. for (auto [oldResult, newResult, sharding] : - llvm::zip(op.getResults(), newOp->getResults(), resultShardings)) { + llvm::zip_equal(op.getResults(), newOp->getResults(), resultShardings)) { newResult.setType(shardType(newResult.getType(), getMesh(&op, sharding.getMesh(), symbolTable), sharding)); diff --git a/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp b/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp index d59b9119dea541..cb13ee404751ca 100644 --- a/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp @@ -208,4 +208,17 @@ createCollectiveProcessGroupSize(MeshOp mesh, ArrayRef axes, .cast>(); } +TypedValue createProcessLinearIndex(StringRef mesh, + ArrayRef meshAxes, + ImplicitLocOpBuilder &builder) { + ResultRange processInGroupMultiIndex = + builder.create(mesh, meshAxes).getResults(); + Operation::result_range processGroupShape = + builder.create(mesh, meshAxes).getResult(); + OpFoldResult processInGroupLinearIndex = affine::linearizeIndex( + llvm::to_vector_of(processInGroupMultiIndex), + llvm::to_vector_of(processGroupShape), builder); + return cast>(processInGroupLinearIndex.get()); +} + } // namespace mlir::mesh diff --git a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir new file mode 100644 index 00000000000000..6d21def8de2753 --- /dev/null +++ b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir @@ -0,0 +1,165 @@ +// RUN: mlir-opt \ +// RUN: --mesh-spmdization \ +// RUN: --test-constant-fold \ +// RUN: --split-input-file \ +// RUN: %s | FileCheck %s + +// CHECK: #[[$MAP_IDENTITY_1D:.*]] = affine_map<(d0) -> (d0)> +#map_identity_1d = affine_map<(d0) -> (d0)> + +mesh.mesh @mesh_1d(shape = 2) + +// CHECK-LABEL: func @elementwise_static_1d_mesh_static_1d_tensor +func.func @elementwise_static_1d_mesh_static_1d_tensor( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<1xi8>, + %in1: tensor<2xi8>, + // CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<1xi8>, + %in2: tensor<2xi8>, + // CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<1xi8> + %dps_out: tensor<2xi8> +// CHECK-SAME: -> tensor<1xi8> { +) -> tensor<2xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[0]]> : tensor<2xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<2xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[0]]> : tensor<2xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + // CHECK: %[[RES:.*]] = linalg.generic { + // CHECK-SAME: indexing_maps = [#[[$MAP_IDENTITY_1D]], #[[$MAP_IDENTITY_1D]], #[[$MAP_IDENTITY_1D]]], + // CHECK-SAME: iterator_types = ["parallel"]} + // CHECK-SAME: ins(%[[IN1]], %[[IN2]] : tensor<1xi8>, tensor<1xi8>) + // CHECK-SAME: outs(%[[DPS_OUT]] : tensor<1xi8>) { + %res = linalg.generic { + indexing_maps = [#map_identity_1d, #map_identity_1d, #map_identity_1d], + iterator_types = ["parallel"] + } ins(%in1_shared2, %in2_shared2 : tensor<2xi8>, tensor<2xi8>) + outs(%dps_out_shared2 : tensor<2xi8>) { + ^bb0(%in1_scalar: i8, %in2_scalar: i8, %out: i8): + %res_scalar = arith.muli %in1_scalar, %in2_scalar : i8 + linalg.yield %res_scalar : i8 + } -> tensor<2xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[0]]> : tensor<2xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + // CHECK: return %[[RES]] : tensor<1xi8> + return %res_shared2 : tensor<2xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 4) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_parallel_iterator_sharding +func.func @matmul_1d_mesh_static_tensors_parallel_iterator_sharding( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<1x3xi8>, + %in1: tensor<4x3xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<3x8xi8>, + %in2: tensor<3x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<1x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<1x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[0]]> : tensor<4x3xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x3xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[]]> : tensor<3x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<3x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[0]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x8xi8> + // CHECK: %[[RES:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[IN1]], %[[IN2]] : tensor<1x3xi8>, tensor<3x8xi8>) + // CHECK-SAME: outs(%[[DPS_OUT]] : tensor<1x8xi8>) + // CHECK-SAME: -> tensor<1x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x3xi8>, tensor<3x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[0]]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[RES]] : tensor<1x8xi8> + return %res_shared2 : tensor<4x8xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 3) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding +func.func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<4x2xi8>, + %in1: tensor<4x6xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<2x8xi8>, + %in2: tensor<6x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<4x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<4x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[], [0]]> : tensor<4x6xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[], [0]]> annotate_for_users: tensor<4x6xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<6x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<6x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C0_I8:.*]] = arith.constant 0 : i8 + // CHECK-DAG: %[[PROCESS_IDX:.*]] = mesh.process_multi_index on @mesh_1d axes = [0] : index + // CHECK-DAG: %[[MESH_SIZE:.*]] = mesh.mesh_shape @mesh_1d axes = [0] : index + // CHECK: %[[DPS_INIT_OPERAND_CONDITION:.*]] = arith.cmpi eq, %[[PROCESS_IDX]], %[[C0]] : index + // CHECK: %[[DPS_INIT_OPERAND:.*]] = scf.if %[[DPS_INIT_OPERAND_CONDITION]] -> (tensor<4x8xi8>) { + // CHECK: scf.yield %[[DPS_OUT]] : tensor<4x8xi8> + // CHECK: } else { + // CHECK-DAG: %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<4x8xi8> + // CHECK: %[[NEUTRAL_ELEMENT_FILLED_TENSOR:.*]] = linalg.fill ins(%[[C0_I8]] : i8) + // CHECK-SAME: outs(%[[EMPTY_TENSOR]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: scf.yield %[[NEUTRAL_ELEMENT_FILLED_TENSOR]] : tensor<4x8xi8> + // CHECK: } + // CHECK: %[[SHARDED_MATMUL:.*]] = linalg.matmul ins(%[[IN1]], %[[IN2]] : tensor<4x2xi8>, tensor<2x8xi8>) + // CHECK-SAME: outs(%[[DPS_INIT_OPERAND]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: %[[ALL_REDUCED:.*]] = mesh.all_reduce %[[SHARDED_MATMUL]] on @mesh_1d mesh_axes = [0] : tensor<4x8xi8> -> tensor<4x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x6xi8>, tensor<6x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[]]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[ALL_REDUCED]] : tensor<4x8xi8> + return %res_shared2 : tensor<4x8xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 3) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding_with_partial_result +func.func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding_with_partial_result( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<4x2xi8>, + %in1: tensor<4x6xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<2x8xi8>, + %in2: tensor<6x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<4x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<4x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[], [0]]> : tensor<4x6xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[], [0]]> annotate_for_users: tensor<4x6xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<6x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<6x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C0_I8:.*]] = arith.constant 0 : i8 + // CHECK-DAG: %[[PROCESS_IDX:.*]] = mesh.process_multi_index on @mesh_1d axes = [0] : index + // CHECK-DAG: %[[MESH_SIZE:.*]] = mesh.mesh_shape @mesh_1d axes = [0] : index + // CHECK: %[[DPS_INIT_OPERAND_CONDITION:.*]] = arith.cmpi eq, %[[PROCESS_IDX]], %[[C0]] : index + // CHECK: %[[DPS_INIT_OPERAND:.*]] = scf.if %[[DPS_INIT_OPERAND_CONDITION]] -> (tensor<4x8xi8>) { + // CHECK: scf.yield %[[DPS_OUT]] : tensor<4x8xi8> + // CHECK: } else { + // CHECK-DAG: %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<4x8xi8> + // CHECK: %[[NEUTRAL_ELEMENT_FILLED_TENSOR:.*]] = linalg.fill ins(%[[C0_I8]] : i8) + // CHECK-SAME: outs(%[[EMPTY_TENSOR]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: scf.yield %[[NEUTRAL_ELEMENT_FILLED_TENSOR]] : tensor<4x8xi8> + // CHECK: } + // CHECK: %[[SHARDED_MATMUL:.*]] = linalg.matmul ins(%[[IN1]], %[[IN2]] : tensor<4x2xi8>, tensor<2x8xi8>) + // CHECK-SAME: outs(%[[DPS_INIT_OPERAND]] : tensor<4x8xi8>) -> tensor<4x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x6xi8>, tensor<6x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[]], partial = sum[0]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[]], partial = sum[0]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[SHARDED_MATMUL]] : tensor<4x8xi8> + return %res_shared2 : tensor<4x8xi8> +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 7a6bc2dc320255..2cfe6184470330 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10841,6 +10841,7 @@ cc_library( ":MemRefDialect", ":Parser", ":SCFDialect", + ":MeshShardingInterface", ":SideEffectInterfaces", ":SparseTensorDialect", ":Support", @@ -10994,10 +10995,13 @@ cc_library( ":MathDialect", ":MemRefDialect", ":MemRefTransforms", + ":MeshDialect", + ":MeshTransforms", ":Pass", ":SCFDialect", ":SCFTransforms", ":SCFUtils", + ":MeshShardingInterface", ":SparseTensorDialect", ":SubsetOpInterface", ":Support", From ddaf040ea924b1bdd4e093f583018c262da3cc7f Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 8 Mar 2024 10:06:24 +0900 Subject: [PATCH 137/158] [mlir][Transforms][NFC] Make signature conversion more efficient (#83922) During block signature conversion, a new block is inserted and ops are moved from the old block to the new block. This commit changes the implementation such that ops are moved in bulk (`splice`) instead of one-by-one; that's what `splitBlock` is doing. This also makes it possible to pass the new block argument types directly to `createBlock` instead of using `addArgument` (which bypasses the rewriter). This doesn't change anything from a technical point of view (there is no rewriter API for adding arguments at the moment), but the implementation reads a bit nicer. --- .../Transforms/Utils/DialectConversion.cpp | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d7dc902a9a5ebd..8b2d71408a5651 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1281,7 +1281,7 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( ConversionPatternRewriter &rewriter, Block *block, const TypeConverter *converter, TypeConverter::SignatureConversion &signatureConversion) { - MLIRContext *ctx = rewriter.getContext(); + OpBuilder::InsertionGuard g(rewriter); // If no arguments are being changed or added, there is nothing to do. unsigned origArgCount = block->getNumArguments(); @@ -1289,14 +1289,9 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( if (llvm::equal(block->getArgumentTypes(), convertedTypes)) return block; - // Split the block at the beginning to get a new block to use for the updated - // signature. - Block *newBlock = rewriter.splitBlock(block, block->begin()); - block->replaceAllUsesWith(newBlock); - - // Map all new arguments to the location of the argument they originate from. + // Compute the locations of all block arguments in the new block. SmallVector newLocs(convertedTypes.size(), - Builder(ctx).getUnknownLoc()); + rewriter.getUnknownLoc()); for (unsigned i = 0; i < origArgCount; ++i) { auto inputMap = signatureConversion.getInputMapping(i); if (!inputMap || inputMap->replacementValue) @@ -1306,9 +1301,16 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( newLocs[inputMap->inputNo + j] = origLoc; } - SmallVector newArgRange( - newBlock->addArguments(convertedTypes, newLocs)); - ArrayRef newArgs(newArgRange); + // Insert a new block with the converted block argument types and move all ops + // from the old block to the new block. + Block *newBlock = + rewriter.createBlock(block->getParent(), std::next(block->getIterator()), + convertedTypes, newLocs); + appendRewrite(newBlock, block, newBlock->end()); + newBlock->getOperations().splice(newBlock->end(), block->getOperations()); + + // Replace all uses of the old block with the new block. + block->replaceAllUsesWith(newBlock); // Remap each of the original arguments as determined by the signature // conversion. @@ -1333,7 +1335,8 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( } // Otherwise, this is a 1->1+ mapping. - auto replArgs = newArgs.slice(inputMap->inputNo, inputMap->size); + auto replArgs = + newBlock->getArguments().slice(inputMap->inputNo, inputMap->size); Value newArg; // If this is a 1->1 mapping and the types of new and replacement arguments From e7a22e72de79352c4639664f1ac678555a4c20e4 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Thu, 7 Mar 2024 20:19:26 -0500 Subject: [PATCH 138/158] [PPC] precommit cases for issue 74915 --- llvm/test/CodeGen/PowerPC/pr74951.ll | 54 ++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr74951.ll diff --git a/llvm/test/CodeGen/PowerPC/pr74951.ll b/llvm/test/CodeGen/PowerPC/pr74951.ll new file mode 100644 index 00000000000000..a0d19fc09cc276 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr74951.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -verify-machineinstrs -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s + +%struct.anon = type { i32 } + +@b = local_unnamed_addr global %struct.anon { i32 -1 }, align 4 +@g = local_unnamed_addr global [1 x i1] zeroinitializer, align 1 + +define noundef signext i32 @main() { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, L..C0(r2) # @b +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: extsw r4, r3 +; CHECK-NEXT: neg r4, r4 +; CHECK-NEXT: andi. r5, r3, 65535 +; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: bne cr0, L..BB0_4 +; CHECK-NEXT: # %bb.1: # %lor.rhs.i.i +; CHECK-NEXT: xori r5, r4, 1 +; CHECK-NEXT: cmpw r3, r5 +; CHECK-NEXT: crnot 4*cr5+lt, eq +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 +; CHECK-NEXT: # %bb.2: # %lor.rhs.i.i +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: L..BB0_3: # %lor.rhs.i.i +; CHECK-NEXT: ld r5, L..C1(r2) # @g +; CHECK-NEXT: stb r3, 0(r5) +; CHECK-NEXT: L..BB0_4: # %g.exit +; CHECK-NEXT: ld r5, L..C1(r2) # @g +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: stb r4, 0(r5) +; CHECK-NEXT: blr +entry: + %0 = load i32, ptr @b, align 4 + %conv4.i = sext i32 %0 to i64 + %cmp.i = icmp slt i32 %0, 1 + %conv.i = zext i1 %cmp.i to i32 + %cmp1.i = icmp ne i32 %0, %conv.i + %conv3.i = trunc i32 %0 to i16 + %tobool.not.i.i = icmp eq i16 %conv3.i, 0 + br i1 %tobool.not.i.i, label %lor.rhs.i.i, label %g.exit + +lor.rhs.i.i: ; preds = %entry + store i1 %cmp1.i, ptr @g, align 1 + br label %g.exit + +g.exit: ; preds = %lor.end.i.i + %4 = trunc i64 %conv4.i to i32 + %cmp.i9.i = icmp sgt i32 %4, 0 + store i1 %cmp.i9.i, ptr @g, align 1 + ret i32 0 +} From 60a20bd6973c8fc7aa9a19465ed042604e07fb17 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 8 Mar 2024 10:34:45 +0900 Subject: [PATCH 139/158] [mlir][Transforms] Add listener support to dialect conversion (#83425) This commit adds listener support to the dialect conversion. Similarly to the greedy pattern rewrite driver, an optional listener can be specified in the configuration object. Listeners are notified only if the dialect conversion succeeds. In case of a failure, where some IR changes are first performed and then rolled back, no notifications are sent. Due to the fact that some kinds of rewrite are reflected in the IR immediately and some in a delayed fashion, there are certain limitations when attaching a listener; these are documented in `ConversionConfig`. To summarize, users are always notified about all rewrites that happened, but the notifications are sent all at once at the very end, and not interleaved with the actual IR changes. This change is in preparation improvements to `transform.apply_conversion_patterns`, which currently invalidates all handles. In the future, it can use a listener to update handles accordingly, similar to `transform.apply_patterns`. --- .../mlir/Transforms/DialectConversion.h | 33 +++ .../Transforms/Utils/DialectConversion.cpp | 225 ++++++++++++++---- mlir/test/Transforms/test-legalizer.mlir | 71 +++++- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 28 ++- 4 files changed, 302 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 01fde101ef3cb6..83198c9b0db545 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1085,6 +1085,39 @@ struct ConversionConfig { /// IR during an analysis conversion and only pre-existing operations are /// added to the set. DenseSet *legalizableOps = nullptr; + + /// An optional listener that is notified about all IR modifications in case + /// dialect conversion succeeds. If the dialect conversion fails and no IR + /// modifications are visible (i.e., they were all rolled back), no + /// notifications are sent. + /// + /// Note: Notifications are sent in a delayed fashion, when the dialect + /// conversion is guaranteed to succeed. At that point, some IR modifications + /// may already have been materialized. Consequently, operations/blocks that + /// are passed to listener callbacks should not be accessed. (Ops/blocks are + /// guaranteed to be valid pointers and accessing op names is allowed. But + /// there are no guarantees about the state of ops/blocks at the time that a + /// callback is triggered.) + /// + /// Example: Consider a dialect conversion a new op ("test.foo") is created + /// and inserted, and later moved to another block. (Moving ops also triggers + /// "notifyOperationInserted".) + /// + /// (1) notifyOperationInserted: "test.foo" (into block "b1") + /// (2) notifyOperationInserted: "test.foo" (moved to another block "b2") + /// + /// When querying "op->getBlock()" during the first "notifyOperationInserted", + /// "b2" would be returned because "moving an op" is a kind of rewrite that is + /// immediately performed by the dialect conversion (and rolled back upon + /// failure). + // + // Note: When receiving a "notifyBlockInserted"/"notifyOperationInserted" + // callback, the previous region/block is provided to the callback, but not + // the iterator pointing to the exact location within the region/block. That + // is because these notifications are sent with a delay (after the IR has + // already been modified) and iterators into past IR state cannot be + // represented at the moment. + RewriterBase::Listener *listener = nullptr; }; //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 8b2d71408a5651..c1a261eab8487d 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -204,14 +204,22 @@ class IRRewrite { /// Roll back the rewrite. Operations may be erased during rollback. virtual void rollback() = 0; - /// Commit the rewrite. Operations/blocks may be unlinked during the commit - /// phase, but they must not be erased yet. This is because internal dialect - /// conversion state (such as `mapping`) may still be using them. Operations/ - /// blocks must be erased during cleanup. - virtual void commit() {} + /// Commit the rewrite. At this point, it is certain that the dialect + /// conversion will succeed. All IR modifications, except for operation/block + /// erasure, must be performed through the given rewriter. + /// + /// Instead of erasing operations/blocks, they should merely be unlinked + /// commit phase and finally be erased during the cleanup phase. This is + /// because internal dialect conversion state (such as `mapping`) may still + /// be using them. + /// + /// Any IR modification that was already performed before the commit phase + /// (e.g., insertion of an op) must be communicated to the listener that may + /// be attached to the given rewriter. + virtual void commit(RewriterBase &rewriter) {} /// Cleanup operations/blocks. Cleanup is called after commit. - virtual void cleanup() {} + virtual void cleanup(RewriterBase &rewriter) {} Kind getKind() const { return kind; } @@ -221,12 +229,6 @@ class IRRewrite { IRRewrite(Kind kind, ConversionPatternRewriterImpl &rewriterImpl) : kind(kind), rewriterImpl(rewriterImpl) {} - /// Erase the given op (unless it was already erased). - void eraseOp(Operation *op); - - /// Erase the given block (unless it was already erased). - void eraseBlock(Block *block); - const ConversionConfig &getConfig() const; const Kind kind; @@ -265,6 +267,12 @@ class CreateBlockRewrite : public BlockRewrite { return rewrite->getKind() == Kind::CreateBlock; } + void commit(RewriterBase &rewriter) override { + // The block was already created and inserted. Just inform the listener. + if (auto *listener = rewriter.getListener()) + listener->notifyBlockInserted(block, /*previous=*/{}, /*previousIt=*/{}); + } + void rollback() override { // Unlink all of the operations within this block, they will be deleted // separately. @@ -311,10 +319,19 @@ class EraseBlockRewrite : public BlockRewrite { block = nullptr; } - void cleanup() override { + void commit(RewriterBase &rewriter) override { // Erase the block. assert(block && "expected block"); assert(block->empty() && "expected empty block"); + + // Notify the listener that the block is about to be erased. + if (auto *listener = + dyn_cast_or_null(rewriter.getListener())) + listener->notifyBlockErased(block); + } + + void cleanup(RewriterBase &rewriter) override { + // Erase the block. block->dropAllDefinedValueUses(); delete block; block = nullptr; @@ -341,6 +358,13 @@ class InlineBlockRewrite : public BlockRewrite { firstInlinedInst(sourceBlock->empty() ? nullptr : &sourceBlock->front()), lastInlinedInst(sourceBlock->empty() ? nullptr : &sourceBlock->back()) { + // If a listener is attached to the dialect conversion, ops must be moved + // one-by-one. When they are moved in bulk, notifications cannot be sent + // because the ops that used to be in the source block at the time of the + // inlining (before the "commit" phase) are unknown at the time when + // notifications are sent (which is during the "commit" phase). + assert(!getConfig().listener && + "InlineBlockRewrite not supported if listener is attached"); } static bool classof(const IRRewrite *rewrite) { @@ -382,6 +406,16 @@ class MoveBlockRewrite : public BlockRewrite { return rewrite->getKind() == Kind::MoveBlock; } + void commit(RewriterBase &rewriter) override { + // The block was already moved. Just inform the listener. + if (auto *listener = rewriter.getListener()) { + // Note: `previousIt` cannot be passed because this is a delayed + // notification and iterators into past IR state cannot be represented. + listener->notifyBlockInserted(block, /*previous=*/region, + /*previousIt=*/{}); + } + } + void rollback() override { // Move the block back to its original position. Region::iterator before = @@ -437,7 +471,7 @@ class BlockTypeConversionRewrite : public BlockRewrite { LogicalResult materializeLiveConversions(function_ref findLiveUser); - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; @@ -466,7 +500,7 @@ class ReplaceBlockArgRewrite : public BlockRewrite { return rewrite->getKind() == Kind::ReplaceBlockArg; } - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; @@ -506,6 +540,17 @@ class MoveOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::MoveOperation; } + void commit(RewriterBase &rewriter) override { + // The operation was already moved. Just inform the listener. + if (auto *listener = rewriter.getListener()) { + // Note: `previousIt` cannot be passed because this is a delayed + // notification and iterators into past IR state cannot be represented. + listener->notifyOperationInserted( + op, /*previous=*/OpBuilder::InsertPoint(/*insertBlock=*/block, + /*insertPt=*/{})); + } + } + void rollback() override { // Move the operation back to its original position. Block::iterator before = @@ -549,7 +594,12 @@ class ModifyOperationRewrite : public OperationRewrite { "rewrite was neither committed nor rolled back"); } - void commit() override { + void commit(RewriterBase &rewriter) override { + // Notify the listener that the operation was modified in-place. + if (auto *listener = + dyn_cast_or_null(rewriter.getListener())) + listener->notifyOperationModified(op); + if (propertiesStorage) { OpaqueProperties propCopy(propertiesStorage); // Note: The operation may have been erased in the mean time, so @@ -600,11 +650,11 @@ class ReplaceOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::ReplaceOperation; } - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; - void cleanup() override; + void cleanup(RewriterBase &rewriter) override; const TypeConverter *getConverter() const { return converter; } @@ -629,6 +679,12 @@ class CreateOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::CreateOperation; } + void commit(RewriterBase &rewriter) override { + // The operation was already created and inserted. Just inform the listener. + if (auto *listener = rewriter.getListener()) + listener->notifyOperationInserted(op, /*previous=*/{}); + } + void rollback() override; }; @@ -666,7 +722,7 @@ class UnresolvedMaterializationRewrite : public OperationRewrite { void rollback() override; - void cleanup() override; + void cleanup(RewriterBase &rewriter) override; /// Return the type converter of this materialization (which may be null). const TypeConverter *getConverter() const { @@ -735,7 +791,7 @@ namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(MLIRContext *ctx, const ConversionConfig &config) - : eraseRewriter(ctx), config(config) {} + : context(ctx), config(config) {} //===--------------------------------------------------------------------===// // State Management @@ -900,6 +956,7 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { } void notifyOperationErased(Operation *op) override { erased.insert(op); } + void notifyBlockErased(Block *block) override { erased.insert(block); } /// Pointers to all erased operations and blocks. @@ -910,8 +967,8 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { // State //===--------------------------------------------------------------------===// - /// This rewriter must be used for erasing ops/blocks. - SingleEraseRewriter eraseRewriter; + /// MLIR context. + MLIRContext *context; // Mapping between replaced values that differ in type. This happens when // replacing a value with one of a different type. @@ -955,19 +1012,19 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { } // namespace detail } // namespace mlir -void IRRewrite::eraseOp(Operation *op) { - rewriterImpl.eraseRewriter.eraseOp(op); -} - -void IRRewrite::eraseBlock(Block *block) { - rewriterImpl.eraseRewriter.eraseBlock(block); -} - const ConversionConfig &IRRewrite::getConfig() const { return rewriterImpl.config; } -void BlockTypeConversionRewrite::commit() { +void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) { + // Inform the listener about all IR modifications that have already taken + // place: References to the original block have been replaced with the new + // block. + if (auto *listener = dyn_cast_or_null( + rewriter.getListener())) + for (Operation *op : block->getUsers()) + listener->notifyOperationModified(op); + // Process the remapping for each of the original arguments. for (auto [origArg, info] : llvm::zip_equal(origBlock->getArguments(), argInfo)) { @@ -975,7 +1032,7 @@ void BlockTypeConversionRewrite::commit() { if (!info) { if (Value newArg = rewriterImpl.mapping.lookupOrNull(origArg, origArg.getType())) - origArg.replaceAllUsesWith(newArg); + rewriter.replaceAllUsesWith(origArg, newArg); continue; } @@ -985,8 +1042,8 @@ void BlockTypeConversionRewrite::commit() { // If the argument is still used, replace it with the generated cast. if (!origArg.use_empty()) { - origArg.replaceAllUsesWith( - rewriterImpl.mapping.lookupOrDefault(castValue, origArg.getType())); + rewriter.replaceAllUsesWith(origArg, rewriterImpl.mapping.lookupOrDefault( + castValue, origArg.getType())); } } } @@ -1042,13 +1099,13 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions( return success(); } -void ReplaceBlockArgRewrite::commit() { +void ReplaceBlockArgRewrite::commit(RewriterBase &rewriter) { Value repl = rewriterImpl.mapping.lookupOrNull(arg, arg.getType()); if (!repl) return; if (isa(repl)) { - arg.replaceAllUsesWith(repl); + rewriter.replaceAllUsesWith(arg, repl); return; } @@ -1057,7 +1114,7 @@ void ReplaceBlockArgRewrite::commit() { // replacement value. Operation *replOp = cast(repl).getOwner(); Block *replBlock = replOp->getBlock(); - arg.replaceUsesWithIf(repl, [&](OpOperand &operand) { + rewriter.replaceUsesWithIf(arg, repl, [&](OpOperand &operand) { Operation *user = operand.getOwner(); return user->getBlock() != replBlock || replOp->isBeforeInBlock(user); }); @@ -1065,14 +1122,40 @@ void ReplaceBlockArgRewrite::commit() { void ReplaceBlockArgRewrite::rollback() { rewriterImpl.mapping.erase(arg); } -void ReplaceOperationRewrite::commit() { - for (OpResult result : op->getResults()) - if (Value newValue = - rewriterImpl.mapping.lookupOrNull(result, result.getType())) - result.replaceAllUsesWith(newValue); +void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { + auto *listener = dyn_cast_or_null( + rewriter.getListener()); + + // Compute replacement values. + SmallVector replacements = + llvm::map_to_vector(op->getResults(), [&](OpResult result) { + return rewriterImpl.mapping.lookupOrNull(result, result.getType()); + }); + + // Notify the listener that the operation is about to be replaced. + if (listener) + listener->notifyOperationReplaced(op, replacements); + + // Replace all uses with the new values. + for (auto [result, newValue] : + llvm::zip_equal(op->getResults(), replacements)) + if (newValue) + rewriter.replaceAllUsesWith(result, newValue); + + // The original op will be erased, so remove it from the set of unlegalized + // ops. if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); + + // Notify the listener that the operation (and its nested operations) was + // erased. + if (listener) { + op->walk( + [&](Operation *op) { listener->notifyOperationErased(op); }); + } + // Do not erase the operation yet. It may still be referenced in `mapping`. + // Just unlink it for now and erase it during cleanup. op->getBlock()->getOperations().remove(op); } @@ -1081,7 +1164,9 @@ void ReplaceOperationRewrite::rollback() { rewriterImpl.mapping.erase(result); } -void ReplaceOperationRewrite::cleanup() { eraseOp(op); } +void ReplaceOperationRewrite::cleanup(RewriterBase &rewriter) { + rewriter.eraseOp(op); +} void CreateOperationRewrite::rollback() { for (Region ®ion : op->getRegions()) { @@ -1100,14 +1185,20 @@ void UnresolvedMaterializationRewrite::rollback() { op->erase(); } -void UnresolvedMaterializationRewrite::cleanup() { eraseOp(op); } +void UnresolvedMaterializationRewrite::cleanup(RewriterBase &rewriter) { + rewriter.eraseOp(op); +} void ConversionPatternRewriterImpl::applyRewrites() { // Commit all rewrites. + IRRewriter rewriter(context, config.listener); for (auto &rewrite : rewrites) - rewrite->commit(); + rewrite->commit(rewriter); + + // Clean up all rewrites. + SingleEraseRewriter eraseRewriter(context); for (auto &rewrite : rewrites) - rewrite->cleanup(); + rewrite->cleanup(eraseRewriter); } //===----------------------------------------------------------------------===// @@ -1306,8 +1397,21 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( Block *newBlock = rewriter.createBlock(block->getParent(), std::next(block->getIterator()), convertedTypes, newLocs); - appendRewrite(newBlock, block, newBlock->end()); - newBlock->getOperations().splice(newBlock->end(), block->getOperations()); + + // If a listener is attached to the dialect conversion, ops cannot be moved + // to the destination block in bulk ("fast path"). This is because at the time + // the notifications are sent, it is unknown which ops were moved. Instead, + // ops should be moved one-by-one ("slow path"), so that a separate + // `MoveOperationRewrite` is enqueued for each moved op. Moving ops in bulk is + // a bit more efficient, so we try to do that when possible. + bool fastPath = !config.listener; + if (fastPath) { + appendRewrite(newBlock, block, newBlock->end()); + newBlock->getOperations().splice(newBlock->end(), block->getOperations()); + } else { + while (!block->empty()) + rewriter.moveOpBefore(&block->front(), newBlock, newBlock->end()); + } // Replace all uses of the old block with the new block. block->replaceAllUsesWith(newBlock); @@ -1645,10 +1749,31 @@ void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest, "expected 'source' to have no predecessors"); #endif // NDEBUG - impl->notifyBlockBeingInlined(dest, source, before); + // If a listener is attached to the dialect conversion, ops cannot be moved + // to the destination block in bulk ("fast path"). This is because at the time + // the notifications are sent, it is unknown which ops were moved. Instead, + // ops should be moved one-by-one ("slow path"), so that a separate + // `MoveOperationRewrite` is enqueued for each moved op. Moving ops in bulk is + // a bit more efficient, so we try to do that when possible. + bool fastPath = !impl->config.listener; + + if (fastPath) + impl->notifyBlockBeingInlined(dest, source, before); + + // Replace all uses of block arguments. for (auto it : llvm::zip(source->getArguments(), argValues)) replaceUsesOfBlockArgument(std::get<0>(it), std::get<1>(it)); - dest->getOperations().splice(before, source->getOperations()); + + if (fastPath) { + // Move all ops at once. + dest->getOperations().splice(before, source->getOperations()); + } else { + // Move op by op. + while (!source->empty()) + moveOpBefore(&source->front(), dest, before); + } + + // Erase the source block. eraseBlock(source); } diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index ccdc9fe78ea0d3..d552f0346644b3 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -1,5 +1,10 @@ // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns -verify-diagnostics %s | FileCheck %s +// CHECK: notifyOperationInserted: test.legal_op_a, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_a +// CHECK-NEXT: notifyOperationModified: func.return +// CHECK-NEXT: notifyOperationErased: test.illegal_op_a + // CHECK-LABEL: verifyDirectPattern func.func @verifyDirectPattern() -> i32 { // CHECK-NEXT: "test.legal_op_a"() <{status = "Success"} @@ -8,6 +13,16 @@ func.func @verifyDirectPattern() -> i32 { return %result : i32 } +// ----- + +// CHECK: notifyOperationInserted: test.illegal_op_e, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_c +// CHECK-NEXT: notifyOperationModified: func.return +// CHECK-NEXT: notifyOperationErased: test.illegal_op_c +// CHECK-NEXT: notifyOperationInserted: test.legal_op_a, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_e +// CHECK-NEXT: notifyOperationErased: test.illegal_op_e + // CHECK-LABEL: verifyLargerBenefit func.func @verifyLargerBenefit() -> i32 { // CHECK-NEXT: "test.legal_op_a"() <{status = "Success"} @@ -16,16 +31,24 @@ func.func @verifyLargerBenefit() -> i32 { return %result : i32 } +// ----- + +// CHECK: notifyOperationModified: func.func +// Note: No block insertion because this function is external and no block +// signature conversion is performed. + // CHECK-LABEL: func private @remap_input_1_to_0() func.func private @remap_input_1_to_0(i16) +// ----- + // CHECK-LABEL: func @remap_input_1_to_1(%arg0: f64) func.func @remap_input_1_to_1(%arg0: i64) { // CHECK-NEXT: "test.valid"{{.*}} : (f64) "test.invalid"(%arg0) : (i64) -> () } -// CHECK-LABEL: func @remap_call_1_to_1(%arg0: f64) +// CHECK: func @remap_call_1_to_1(%arg0: f64) func.func @remap_call_1_to_1(%arg0: i64) { // CHECK-NEXT: call @remap_input_1_to_1(%arg0) : (f64) -> () call @remap_input_1_to_1(%arg0) : (i64) -> () @@ -33,12 +56,36 @@ func.func @remap_call_1_to_1(%arg0: i64) { return } +// ----- + +// Block signature conversion: new block is inserted. +// CHECK: notifyBlockInserted into func.func: was unlinked + +// Contents of the old block are moved to the new block. +// CHECK-NEXT: notifyOperationInserted: test.return, was linked, exact position unknown + +// The new block arguments are used in "test.return". +// CHECK-NEXT: notifyOperationModified: test.return + +// The old block is erased. +// CHECK-NEXT: notifyBlockErased + +// The function op gets a new type attribute. +// CHECK-NEXT: notifyOperationModified: func.func + +// "test.return" is replaced. +// CHECK-NEXT: notifyOperationInserted: test.return, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.return +// CHECK-NEXT: notifyOperationErased: test.return + // CHECK-LABEL: func @remap_input_1_to_N({{.*}}f16, {{.*}}f16) func.func @remap_input_1_to_N(%arg0: f32) -> f32 { // CHECK-NEXT: "test.return"{{.*}} : (f16, f16) -> () "test.return"(%arg0) : (f32) -> () } +// ----- + // CHECK-LABEL: func @remap_input_1_to_N_remaining_use(%arg0: f16, %arg1: f16) func.func @remap_input_1_to_N_remaining_use(%arg0: f32) { // CHECK-NEXT: [[CAST:%.*]] = "test.cast"(%arg0, %arg1) : (f16, f16) -> f32 @@ -54,6 +101,8 @@ func.func @remap_materialize_1_to_1(%arg0: i42) { "test.return"(%arg0) : (i42) -> () } +// ----- + // CHECK-LABEL: func @remap_input_to_self func.func @remap_input_to_self(%arg0: index) { // CHECK-NOT: test.cast @@ -68,6 +117,8 @@ func.func @remap_multi(%arg0: i64, %unused: i16, %arg1: i64) -> (i64, i64) { "test.invalid"(%arg0, %arg1) : (i64, i64) -> () } +// ----- + // CHECK-LABEL: func @no_remap_nested func.func @no_remap_nested() { // CHECK-NEXT: "foo.region" @@ -82,6 +133,8 @@ func.func @no_remap_nested() { return } +// ----- + // CHECK-LABEL: func @remap_moved_region_args func.func @remap_moved_region_args() { // CHECK-NEXT: return @@ -96,6 +149,8 @@ func.func @remap_moved_region_args() { return } +// ----- + // CHECK-LABEL: func @remap_cloned_region_args func.func @remap_cloned_region_args() { // CHECK-NEXT: return @@ -122,6 +177,8 @@ func.func @remap_drop_region() { return } +// ----- + // CHECK-LABEL: func @dropped_input_in_use func.func @dropped_input_in_use(%arg: i16, %arg2: i64) { // CHECK-NEXT: "test.cast"{{.*}} : () -> i16 @@ -130,6 +187,8 @@ func.func @dropped_input_in_use(%arg: i16, %arg2: i64) { "work"(%arg) : (i16) -> () } +// ----- + // CHECK-LABEL: func @up_to_date_replacement func.func @up_to_date_replacement(%arg: i8) -> i8 { // CHECK-NEXT: return @@ -139,6 +198,8 @@ func.func @up_to_date_replacement(%arg: i8) -> i8 { return %repl_2 : i8 } +// ----- + // CHECK-LABEL: func @remove_foldable_op // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: i32) func.func @remove_foldable_op(%arg0 : i32) -> (i32) { @@ -150,6 +211,8 @@ func.func @remove_foldable_op(%arg0 : i32) -> (i32) { return %0 : i32 } +// ----- + // CHECK-LABEL: @create_block func.func @create_block() { // Check that we created a block with arguments. @@ -161,6 +224,12 @@ func.func @create_block() { return } +// ----- + +// CHECK: notifyOperationModified: test.recursive_rewrite +// CHECK-NEXT: notifyOperationModified: test.recursive_rewrite +// CHECK-NEXT: notifyOperationModified: test.recursive_rewrite + // CHECK-LABEL: @bounded_recursion func.func @bounded_recursion() { // CHECK: test.recursive_rewrite 0 diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 27eae2ffd694b5..2da184bc3d85ba 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -327,8 +327,12 @@ struct TestPatternDriver struct DumpNotifications : public RewriterBase::Listener { void notifyBlockInserted(Block *block, Region *previous, Region::iterator previousIt) override { - llvm::outs() << "notifyBlockInserted into " - << block->getParentOp()->getName() << ": "; + llvm::outs() << "notifyBlockInserted"; + if (block->getParentOp()) { + llvm::outs() << " into " << block->getParentOp()->getName() << ": "; + } else { + llvm::outs() << " into unknown op: "; + } if (previous == nullptr) { llvm::outs() << "was unlinked\n"; } else { @@ -341,7 +345,9 @@ struct DumpNotifications : public RewriterBase::Listener { if (!previous.isSet()) { llvm::outs() << ", was unlinked\n"; } else { - if (previous.getPoint() == previous.getBlock()->end()) { + if (!previous.getPoint().getNodePtr()) { + llvm::outs() << ", was linked, exact position unknown\n"; + } else if (previous.getPoint() == previous.getBlock()->end()) { llvm::outs() << ", was last in block\n"; } else { llvm::outs() << ", previous = " << previous.getPoint()->getName() @@ -349,9 +355,18 @@ struct DumpNotifications : public RewriterBase::Listener { } } } + void notifyBlockErased(Block *block) override { + llvm::outs() << "notifyBlockErased\n"; + } void notifyOperationErased(Operation *op) override { llvm::outs() << "notifyOperationErased: " << op->getName() << "\n"; } + void notifyOperationModified(Operation *op) override { + llvm::outs() << "notifyOperationModified: " << op->getName() << "\n"; + } + void notifyOperationReplaced(Operation *op, ValueRange values) override { + llvm::outs() << "notifyOperationReplaced: " << op->getName() << "\n"; + } }; struct TestStrictPatternDriver @@ -1153,6 +1168,8 @@ struct TestLegalizePatternDriver if (mode == ConversionMode::Partial) { DenseSet unlegalizedOps; ConversionConfig config; + DumpNotifications dumpNotifications; + config.listener = &dumpNotifications; config.unlegalizedOps = &unlegalizedOps; if (failed(applyPartialConversion(getOperation(), target, std::move(patterns), config))) { @@ -1171,8 +1188,11 @@ struct TestLegalizePatternDriver return (bool)op->getAttrOfType("test.dynamically_legal"); }); + ConversionConfig config; + DumpNotifications dumpNotifications; + config.listener = &dumpNotifications; if (failed(applyFullConversion(getOperation(), target, - std::move(patterns)))) { + std::move(patterns), config))) { getOperation()->emitRemark() << "applyFullConversion failed"; } return; From 474a73d979bdab8782c17829d72386e0da39eb39 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Fri, 8 Mar 2024 09:38:29 +0800 Subject: [PATCH 140/158] [mlir] Fix build failure in MeshShardingInterfaceImpl.cpp (NFC) llvm-project/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp:96:8: error: unused variable 'resultElementType' [-Werror,-Wunused-variable] Type resultElementType = ^ llvm-project/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp:122:1: error: non-void function does not return a value in all control paths [-Werror,-Wreturn-type] } ^ 2 errors generated. --- .../Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp index 7ac45dc3eb3efc..146e880765668b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp @@ -93,7 +93,7 @@ static ReductionKind getReductionKindOfLinalgOp(LinalgOp op) { if (!reductionOp) { return ReductionKind::Generic; } - Type resultElementType = + [[maybe_unused]] Type resultElementType = llvm::cast(op->getResult(0).getType()).getElementType(); // TODO: handle case when result type of the reduction op does not match the // element type of the result tensor. @@ -119,6 +119,7 @@ static MeshOp getMesh(Operation *op, } assert(false); + return nullptr; } // Choose the operand based on the current process index along the reduction From da4957be2365831c94eab0b52612367c29f1d299 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 7 Mar 2024 17:28:37 -0800 Subject: [PATCH 141/158] [NFC] [hwasan] use for_each and move comment --- llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 2ffe89a2458405..bfe474d8204578 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/StackSafetyAnalysis.h" @@ -69,14 +70,12 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, ++NumCoveredExits; } } - // If there's a mix of covered and non-covered exits, just put the untag - // on exits, so we avoid the redundancy of untagging twice. if (NumCoveredExits == ReachableRetVec.size()) { - for (auto *End : Ends) - Callback(End); + for_each(Ends, Callback); } else { - for (auto *RI : ReachableRetVec) - Callback(RI); + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + for_each(ReachableRetVec, Callback); // We may have inserted untag outside of the lifetime interval. // Signal the caller to remove the lifetime end call for this alloca. return false; From cc34e56b865f1fc9e894b75fc958f09dff0fcdea Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Thu, 7 Mar 2024 20:51:47 -0500 Subject: [PATCH 142/158] [PPC][NFC] add an option to expose the bug in 74951 --- llvm/test/CodeGen/PowerPC/pr74951.ll | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/pr74951.ll b/llvm/test/CodeGen/PowerPC/pr74951.ll index a0d19fc09cc276..c1b2e3ee0dd68b 100644 --- a/llvm/test/CodeGen/PowerPC/pr74951.ll +++ b/llvm/test/CodeGen/PowerPC/pr74951.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc < %s -verify-machineinstrs -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s +; RUN: llc < %s -start-before=codegenprepare -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s %struct.anon = type { i32 } @@ -11,26 +12,28 @@ define noundef signext i32 @main() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld r3, L..C0(r2) # @b ; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: extsw r4, r3 -; CHECK-NEXT: neg r4, r4 -; CHECK-NEXT: andi. r5, r3, 65535 -; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: andi. r4, r3, 65535 ; CHECK-NEXT: bne cr0, L..BB0_4 ; CHECK-NEXT: # %bb.1: # %lor.rhs.i.i -; CHECK-NEXT: xori r5, r4, 1 -; CHECK-NEXT: cmpw r3, r5 +; CHECK-NEXT: extsw r4, r3 +; CHECK-NEXT: neg r5, r4 +; CHECK-NEXT: rldicl r5, r5, 1, 63 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: cmpw r4, r5 ; CHECK-NEXT: crnot 4*cr5+lt, eq -; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: li r4, 1 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 ; CHECK-NEXT: # %bb.2: # %lor.rhs.i.i -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: L..BB0_3: # %lor.rhs.i.i ; CHECK-NEXT: ld r5, L..C1(r2) # @g -; CHECK-NEXT: stb r3, 0(r5) +; CHECK-NEXT: stb r4, 0(r5) ; CHECK-NEXT: L..BB0_4: # %g.exit -; CHECK-NEXT: ld r5, L..C1(r2) # @g +; CHECK-NEXT: ld r4, L..C1(r2) # @g +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: rldicl r5, r3, 1, 63 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: stb r4, 0(r5) +; CHECK-NEXT: stb r5, 0(r4) ; CHECK-NEXT: blr entry: %0 = load i32, ptr @b, align 4 From da00c60dae0040185dc45039c4397f6e746548e9 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 8 Mar 2024 10:12:51 +0800 Subject: [PATCH 143/158] [C++20] [Modules] Introduce reduced BMI (#75894) Close https://github.com/llvm/llvm-project/issues/71034 See https://discourse.llvm.org/t/rfc-c-20-modules-introduce-thin-bmi-and-decls-hash/74755 This patch introduces reduced BMI, which doesn't contain the definitions of functions and variables if its definitions won't contribute to the ABI. Testing is a big part of the patch. We want to make sure the reduced BMI contains the same behavior with the existing and relatively stable fatBMI. This is pretty helpful for further reduction. The user interfaces part it left to following patches to ease the reviewing. --- clang/include/clang/Driver/Options.td | 4 +- .../include/clang/Frontend/FrontendActions.h | 15 ++++++- .../include/clang/Frontend/FrontendOptions.h | 6 ++- clang/include/clang/Serialization/ASTWriter.h | 32 ++++++++++++- clang/lib/Frontend/CompilerInvocation.cpp | 3 ++ clang/lib/Frontend/FrontendActions.cpp | 37 ++++++++++++--- .../ExecuteCompilerInvocation.cpp | 2 + clang/lib/Serialization/ASTWriter.cpp | 32 +++++++------ clang/lib/Serialization/ASTWriterDecl.cpp | 45 ++++++++++++++++--- clang/lib/Serialization/GeneratePCH.cpp | 37 ++++++++++++++- clang/test/CXX/basic/basic.link/p10-ex2.cpp | 2 + .../p4-friend-in-reachable-class.cpp | 5 ++- .../test/Modules/InheritDefaultArguments.cppm | 3 ++ clang/test/Modules/Reachability-Private.cpp | 10 +++++ .../Modules/Reachability-func-default-arg.cpp | 3 ++ clang/test/Modules/Reachability-func-ret.cpp | 3 ++ .../Reachability-template-default-arg.cpp | 3 ++ .../Reachability-template-instantiation.cpp | 4 ++ .../Modules/Reachability-using-templates.cpp | 3 ++ clang/test/Modules/Reachability-using.cpp | 3 ++ clang/test/Modules/concept.cppm | 1 - clang/test/Modules/concept_differ.cppm | 5 +++ clang/test/Modules/ctor.arg.dep.cppm | 4 ++ clang/test/Modules/cxx20-10-1-ex1.cpp | 16 +++++++ clang/test/Modules/cxx20-10-1-ex2.cpp | 36 ++++++++++++--- clang/test/Modules/cxx20-10-2-ex2.cpp | 12 +++++ clang/test/Modules/cxx20-10-2-ex5.cpp | 12 +++++ clang/test/Modules/cxx20-10-3-ex1.cpp | 14 ++++++ clang/test/Modules/cxx20-10-3-ex2.cpp | 10 +++++ clang/test/Modules/cxx20-10-5-ex1.cpp | 12 +++++ .../Modules/cxx20-import-diagnostics-a.cpp | 39 ++++++++++++++++ .../Modules/cxx20-import-diagnostics-b.cpp | 25 +++++++++++ .../Modules/cxx20-module-file-info-macros.cpp | 3 ++ clang/test/Modules/deduction-guide.cppm | 3 ++ clang/test/Modules/deduction-guide2.cppm | 3 ++ clang/test/Modules/deduction-guide3.cppm | 3 ++ clang/test/Modules/derived_class.cpp | 3 ++ ...duplicated-module-file-eq-module-name.cppm | 4 ++ clang/test/Modules/enum-class.cppm | 3 ++ .../explicitly-specialized-template.cpp | 3 ++ .../test/Modules/export-language-linkage.cppm | 8 +++- clang/test/Modules/ftime-trace.cppm | 9 ++++ .../inconsistent-deduction-guide-linkage.cppm | 6 +++ clang/test/Modules/inconsistent-export.cppm | 13 ++++++ clang/test/Modules/inherited_arg.cppm | 8 ++++ .../Modules/instantiation-argdep-lookup.cppm | 3 ++ clang/test/Modules/lambdas.cppm | 15 +++++++ .../Modules/merge-concepts-cxx-modules.cpp | 12 +++++ .../Modules/merge-constrained-friends.cpp | 3 ++ clang/test/Modules/merge-lambdas.cppm | 4 ++ .../Modules/merge-requires-with-lambdas.cppm | 19 ++++++++ .../merge-var-template-spec-cxx-modules.cppm | 5 +++ clang/test/Modules/mismatch-diagnostics.cpp | 11 +++++ .../module-init-duplicated-import.cppm | 11 +++++ clang/test/Modules/named-modules-adl-2.cppm | 4 ++ clang/test/Modules/named-modules-adl-3.cppm | 14 ++++++ clang/test/Modules/named-modules-adl.cppm | 3 ++ .../Modules/no-duplicate-codegen-in-GMF.cppm | 10 +++++ clang/test/Modules/pair-unambiguous-ctor.cppm | 9 ++++ .../test/Modules/partial_specialization.cppm | 3 ++ .../test/Modules/placement-new-reachable.cpp | 3 ++ clang/test/Modules/polluted-operator.cppm | 3 ++ clang/test/Modules/pr54457.cppm | 3 ++ clang/test/Modules/pr56916.cppm | 12 +++++ clang/test/Modules/pr58532.cppm | 6 +++ clang/test/Modules/pr58716.cppm | 2 +- clang/test/Modules/pr59719.cppm | 3 ++ clang/test/Modules/pr59780.cppm | 10 +++++ clang/test/Modules/pr59999.cppm | 13 ++++++ clang/test/Modules/pr60036.cppm | 14 ++++++ clang/test/Modules/pr60085.cppm | 17 +++++++ clang/test/Modules/pr60275.cppm | 7 ++- clang/test/Modules/pr60486.cppm | 3 ++ clang/test/Modules/pr60693.cppm | 4 ++ clang/test/Modules/pr60775.cppm | 13 ++++++ clang/test/Modules/pr60890.cppm | 6 +++ clang/test/Modules/pr61065.cppm | 13 ++++++ clang/test/Modules/pr61065_2.cppm | 15 +++++++ clang/test/Modules/pr61067.cppm | 14 ++++++ clang/test/Modules/pr61317.cppm | 9 ++++ clang/test/Modules/pr61783.cppm | 8 ++++ clang/test/Modules/pr61892.cppm | 40 ++++++++--------- clang/test/Modules/pr62158.cppm | 9 ++++ clang/test/Modules/pr62359.cppm | 16 +++++++ clang/test/Modules/pr62589.cppm | 3 ++ clang/test/Modules/pr62705.cppm | 8 ++++ clang/test/Modules/pr62796.cppm | 4 ++ clang/test/Modules/pr62943.cppm | 12 +++++ clang/test/Modules/pr63544.cppm | 12 +++++ clang/test/Modules/pr63595.cppm | 10 +++++ clang/test/Modules/pr67627.cppm | 4 ++ clang/test/Modules/pr67893.cppm | 9 ++++ clang/test/Modules/predefined.cpp | 3 ++ clang/test/Modules/preferred_name.cppm | 10 +++++ clang/test/Modules/redefinition-merges.cppm | 6 +++ .../redundant-template-default-arg.cpp | 3 ++ .../redundant-template-default-arg2.cpp | 3 ++ .../redundant-template-default-arg3.cpp | 3 ++ clang/test/Modules/search-partitions.cpp | 16 +++++++ ...unction-definition-for-template-class.cppm | 12 +++++ .../template-function-specialization.cpp | 5 ++- clang/test/Modules/template-lambdas.cppm | 15 +++++++ clang/test/Modules/template-pack.cppm | 3 ++ .../Modules/template_default_argument.cpp | 3 ++ clang/unittests/Sema/SemaNoloadLookupTest.cpp | 9 ++-- .../Serialization/ForceCheckFileInputTest.cpp | 10 +++-- .../Serialization/NoCommentsTest.cpp | 9 ++-- .../Serialization/VarDeclConstantInitTest.cpp | 13 +++--- 108 files changed, 977 insertions(+), 84 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bef38738fde82e..5b3d366dbcf91b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7414,7 +7414,9 @@ def ast_view : Flag<["-"], "ast-view">, def emit_module : Flag<["-"], "emit-module">, HelpText<"Generate pre-compiled module file from a module map">; def emit_module_interface : Flag<["-"], "emit-module-interface">, - HelpText<"Generate pre-compiled module file from a C++ module interface">; + HelpText<"Generate pre-compiled module file from a standard C++ module interface unit">; +def emit_reduced_module_interface : Flag<["-"], "emit-reduced-module-interface">, + HelpText<"Generate reduced prebuilt module interface from a standard C++ module interface unit">; def emit_header_unit : Flag<["-"], "emit-header-unit">, HelpText<"Generate C++20 header units from header files">; def emit_pch : Flag<["-"], "emit-pch">, diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index fcce31ac0590ff..8441af2ee3e718 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -118,6 +118,9 @@ class GenerateModuleAction : public ASTFrontendAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) = 0; protected: + std::vector> + CreateMultiplexConsumer(CompilerInstance &CI, StringRef InFile); + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; @@ -147,8 +150,10 @@ class GenerateModuleFromModuleMapAction : public GenerateModuleAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) override; }; +/// Generates full BMI (which contains full information to generate the object +/// files) for C++20 Named Modules. class GenerateModuleInterfaceAction : public GenerateModuleAction { -private: +protected: bool BeginSourceFileAction(CompilerInstance &CI) override; std::unique_ptr CreateASTConsumer(CompilerInstance &CI, @@ -158,6 +163,14 @@ class GenerateModuleInterfaceAction : public GenerateModuleAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) override; }; +/// Only generates the reduced BMI. This action is mainly used by tests. +class GenerateReducedModuleInterfaceAction + : public GenerateModuleInterfaceAction { +private: + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override; +}; + class GenerateHeaderUnitAction : public GenerateModuleAction { private: diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 53a8681cfdbba0..8085dbcbf671a6 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -85,9 +85,13 @@ enum ActionKind { /// Generate pre-compiled module from a module map. GenerateModule, - /// Generate pre-compiled module from a C++ module interface file. + /// Generate pre-compiled module from a standard C++ module interface unit. GenerateModuleInterface, + /// Generate reduced module interface for a standard C++ module interface + /// unit. + GenerateReducedModuleInterface, + /// Generate a C++20 header unit module from a header file. GenerateHeaderUnit, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 5e2f305b294caf..e5db486a71a490 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -166,6 +166,10 @@ class ASTWriter : public ASTDeserializationListener, /// Indicates that the AST contained compiler errors. bool ASTHasCompilerErrors = false; + /// Indicates that we're going to generate the reduced BMI for C++20 + /// named modules. + bool GeneratingReducedBMI = false; + /// Mapping from input file entries to the index into the /// offset table where information about that input file is stored. llvm::DenseMap InputFileIDs; @@ -596,7 +600,8 @@ class ASTWriter : public ASTDeserializationListener, ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl &Buffer, InMemoryModuleCache &ModuleCache, ArrayRef> Extensions, - bool IncludeTimestamps = true, bool BuildingImplicitModule = false); + bool IncludeTimestamps = true, bool BuildingImplicitModule = false, + bool GeneratingReducedBMI = false); ~ASTWriter() override; ASTContext &getASTContext() const { @@ -856,6 +861,13 @@ class PCHGenerator : public SemaConsumer { const ASTWriter &getWriter() const { return Writer; } SmallVectorImpl &getPCH() const { return Buffer->Data; } + bool isComplete() const { return Buffer->IsComplete; } + PCHBuffer *getBufferPtr() { return Buffer.get(); } + StringRef getOutputFile() const { return OutputFile; } + DiagnosticsEngine &getDiagnostics() const { + return SemaPtr->getDiagnostics(); + } + public: PCHGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile, StringRef isysroot, @@ -863,7 +875,8 @@ class PCHGenerator : public SemaConsumer { ArrayRef> Extensions, bool AllowASTWithErrors = false, bool IncludeTimestamps = true, bool BuildingImplicitModule = false, - bool ShouldCacheASTInMemory = false); + bool ShouldCacheASTInMemory = false, + bool GeneratingReducedBMI = false); ~PCHGenerator() override; void InitializeSema(Sema &S) override { SemaPtr = &S; } @@ -873,6 +886,21 @@ class PCHGenerator : public SemaConsumer { bool hasEmittedPCH() const { return Buffer->IsComplete; } }; +class ReducedBMIGenerator : public PCHGenerator { +public: + ReducedBMIGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, + StringRef OutputFile, std::shared_ptr Buffer, + bool IncludeTimestamps); + + void HandleTranslationUnit(ASTContext &Ctx) override; +}; + +/// If we can elide the definition of \param D in reduced BMI. +/// +/// Generally, we can elide the definition of a declaration if it won't affect +/// the ABI. e.g., the non-inline function bodies. +bool CanElideDeclDef(const Decl *D); + /// A simple helper class to pack several bits in order into (a) 32 bit /// integer(s). class BitsPacker { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 691f3b989b81e5..451bdb9386f587 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2556,6 +2556,8 @@ static const auto &getFrontendActionTable() { {frontend::GenerateModule, OPT_emit_module}, {frontend::GenerateModuleInterface, OPT_emit_module_interface}, + {frontend::GenerateReducedModuleInterface, + OPT_emit_reduced_module_interface}, {frontend::GenerateHeaderUnit, OPT_emit_header_unit}, {frontend::GeneratePCH, OPT_emit_pch}, {frontend::GenerateInterfaceStubs, OPT_emit_interface_stubs}, @@ -4280,6 +4282,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { case frontend::FixIt: case frontend::GenerateModule: case frontend::GenerateModuleInterface: + case frontend::GenerateReducedModuleInterface: case frontend::GenerateHeaderUnit: case frontend::GeneratePCH: case frontend::GenerateInterfaceStubs: diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index b9ed5dedfa4223..cd9b9923421c69 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -184,12 +184,12 @@ bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) { return true; } -std::unique_ptr -GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, - StringRef InFile) { +std::vector> +GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI, + StringRef InFile) { std::unique_ptr OS = CreateOutputFile(CI, InFile); if (!OS) - return nullptr; + return {}; std::string OutputFile = CI.getFrontendOpts().OutputFile; std::string Sysroot; @@ -210,6 +210,17 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, +CI.getFrontendOpts().BuildingImplicitModule)); Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator( CI, std::string(InFile), OutputFile, std::move(OS), Buffer)); + return std::move(Consumers); +} + +std::unique_ptr +GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + std::vector> Consumers = + CreateMultiplexConsumer(CI, InFile); + if (Consumers.empty()) + return nullptr; + return std::make_unique(std::move(Consumers)); } @@ -265,7 +276,12 @@ GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI, CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; - return GenerateModuleAction::CreateASTConsumer(CI, InFile); + std::vector> Consumers = + CreateMultiplexConsumer(CI, InFile); + if (Consumers.empty()) + return nullptr; + + return std::make_unique(std::move(Consumers)); } std::unique_ptr @@ -274,6 +290,16 @@ GenerateModuleInterfaceAction::CreateOutputFile(CompilerInstance &CI, return CI.createDefaultOutputFile(/*Binary=*/true, InFile, "pcm"); } +std::unique_ptr +GenerateReducedModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + auto Buffer = std::make_shared(); + return std::make_unique( + CI.getPreprocessor(), CI.getModuleCache(), + CI.getFrontendOpts().OutputFile, Buffer, + /*IncludeTimestamps=*/+CI.getFrontendOpts().IncludeTimestamps); +} + bool GenerateHeaderUnitAction::BeginSourceFileAction(CompilerInstance &CI) { if (!CI.getLangOpts().CPlusPlusModules) { CI.getDiagnostics().Report(diag::err_module_interface_requires_cpp_modules); @@ -839,7 +865,6 @@ void DumpModuleInfoAction::ExecuteAction() { const LangOptions &LO = getCurrentASTUnit().getLangOpts(); if (LO.CPlusPlusModules && !LO.CurrentModule.empty()) { - ASTReader *R = getCurrentASTUnit().getASTReader().get(); unsigned SubModuleCount = R->getTotalNumSubmodules(); serialization::ModuleFile &MF = R->getModuleManager().getPrimaryModule(); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 925879a68cbd09..2446aee571f440 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -65,6 +65,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) { return std::make_unique(); case GenerateModuleInterface: return std::make_unique(); + case GenerateReducedModuleInterface: + return std::make_unique(); case GenerateHeaderUnit: return std::make_unique(); case GeneratePCH: return std::make_unique(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a9edc7e68b53b3..6904c924c2fd3d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4623,10 +4623,12 @@ ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl &Buffer, InMemoryModuleCache &ModuleCache, ArrayRef> Extensions, - bool IncludeTimestamps, bool BuildingImplicitModule) + bool IncludeTimestamps, bool BuildingImplicitModule, + bool GeneratingReducedBMI) : Stream(Stream), Buffer(Buffer), ModuleCache(ModuleCache), IncludeTimestamps(IncludeTimestamps), - BuildingImplicitModule(BuildingImplicitModule) { + BuildingImplicitModule(BuildingImplicitModule), + GeneratingReducedBMI(GeneratingReducedBMI) { for (const auto &Ext : Extensions) { if (auto Writer = Ext->createExtensionWriter(*this)) ModuleFileExtensionWriters.push_back(std::move(Writer)); @@ -5457,18 +5459,20 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { // Add a trailing update record, if any. These must go last because we // lazily load their attached statement. - if (HasUpdatedBody) { - const auto *Def = cast(D); - Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION); - Record.push_back(Def->isInlined()); - Record.AddSourceLocation(Def->getInnerLocStart()); - Record.AddFunctionDefinition(Def); - } else if (HasAddedVarDefinition) { - const auto *VD = cast(D); - Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION); - Record.push_back(VD->isInline()); - Record.push_back(VD->isInlineSpecified()); - Record.AddVarDeclInit(VD); + if (!GeneratingReducedBMI || !CanElideDeclDef(D)) { + if (HasUpdatedBody) { + const auto *Def = cast(D); + Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION); + Record.push_back(Def->isInlined()); + Record.AddSourceLocation(Def->getInnerLocStart()); + Record.AddFunctionDefinition(Def); + } else if (HasAddedVarDefinition) { + const auto *VD = cast(D); + Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION); + Record.push_back(VD->isInline()); + Record.push_back(VD->isInlineSpecified()); + Record.AddVarDeclInit(VD); + } } OffsetsRecord.push_back(GetDeclRef(D)); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index e73800100e3ccf..e1862de4a35b8f 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -16,6 +16,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/Expr.h" +#include "clang/AST/ODRHash.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/SourceManager.h" @@ -40,11 +41,14 @@ namespace clang { serialization::DeclCode Code; unsigned AbbrevToUse; + bool GeneratingReducedBMI = false; + public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, - ASTWriter::RecordDataImpl &Record) + ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI) : Writer(Writer), Context(Context), Record(Writer, Record), - Code((serialization::DeclCode)0), AbbrevToUse(0) {} + Code((serialization::DeclCode)0), AbbrevToUse(0), + GeneratingReducedBMI(GeneratingReducedBMI) {} uint64_t Emit(Decl *D) { if (!Code) @@ -270,6 +274,27 @@ namespace clang { }; } +bool clang::CanElideDeclDef(const Decl *D) { + if (auto *FD = dyn_cast(D)) { + if (FD->isInlined() || FD->isConstexpr()) + return false; + + if (FD->isDependentContext()) + return false; + } + + if (auto *VD = dyn_cast(D)) { + if (!VD->getDeclContext()->getRedeclContext()->isFileContext() || + VD->isInline() || VD->isConstexpr() || isa(VD)) + return false; + + if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + return false; + } + + return true; +} + void ASTDeclWriter::Visit(Decl *D) { DeclVisitor::Visit(D); @@ -285,9 +310,12 @@ void ASTDeclWriter::Visit(Decl *D) { // have been written. We want it last because we will not read it back when // retrieving it from the AST, we'll just lazily set the offset. if (auto *FD = dyn_cast(D)) { - Record.push_back(FD->doesThisDeclarationHaveABody()); - if (FD->doesThisDeclarationHaveABody()) - Record.AddFunctionDefinition(FD); + if (!GeneratingReducedBMI || !CanElideDeclDef(FD)) { + Record.push_back(FD->doesThisDeclarationHaveABody()); + if (FD->doesThisDeclarationHaveABody()) + Record.AddFunctionDefinition(FD); + } else + Record.push_back(0); } // Similar to FunctionDecls, handle VarDecl's initializer here and write it @@ -295,7 +323,10 @@ void ASTDeclWriter::Visit(Decl *D) { // we have finished recursive deserialization, because it can recursively // refer back to the variable. if (auto *VD = dyn_cast(D)) { - Record.AddVarDeclInit(VD); + if (!GeneratingReducedBMI || !CanElideDeclDef(VD)) + Record.AddVarDeclInit(VD); + else + Record.push_back(0); } // And similarly for FieldDecls. We already serialized whether there is a @@ -2729,7 +2760,7 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { assert(ID >= FirstDeclID && "invalid decl ID"); RecordData Record; - ASTDeclWriter W(*this, Context, Record); + ASTDeclWriter W(*this, Context, Record, GeneratingReducedBMI); // Build a record for this declaration W.Visit(D); diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index cf8084333811f1..2b511b2d5a90a2 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/SemaConsumer.h" +#include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" @@ -25,11 +27,12 @@ PCHGenerator::PCHGenerator( StringRef OutputFile, StringRef isysroot, std::shared_ptr Buffer, ArrayRef> Extensions, bool AllowASTWithErrors, bool IncludeTimestamps, - bool BuildingImplicitModule, bool ShouldCacheASTInMemory) + bool BuildingImplicitModule, bool ShouldCacheASTInMemory, + bool GeneratingReducedBMI) : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()), SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data), Writer(Stream, this->Buffer->Data, ModuleCache, Extensions, - IncludeTimestamps, BuildingImplicitModule), + IncludeTimestamps, BuildingImplicitModule, GeneratingReducedBMI), AllowASTWithErrors(AllowASTWithErrors), ShouldCacheASTInMemory(ShouldCacheASTInMemory) { this->Buffer->IsComplete = false; @@ -78,3 +81,33 @@ ASTMutationListener *PCHGenerator::GetASTMutationListener() { ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() { return &Writer; } + +ReducedBMIGenerator::ReducedBMIGenerator(const Preprocessor &PP, + InMemoryModuleCache &ModuleCache, + StringRef OutputFile, + std::shared_ptr Buffer, + bool IncludeTimestamps) + : PCHGenerator( + PP, ModuleCache, OutputFile, llvm::StringRef(), Buffer, + /*Extensions=*/ArrayRef>(), + /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/IncludeTimestamps, + /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false, + /*GeneratingReducedBMI=*/true) {} + +void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) { + PCHGenerator::HandleTranslationUnit(Ctx); + + if (!isComplete()) + return; + + std::error_code EC; + auto OS = std::make_unique(getOutputFile(), EC); + if (EC) { + getDiagnostics().Report(diag::err_fe_unable_to_open_output) + << getOutputFile() << EC.message() << "\n"; + return; + } + + *OS << getBufferPtr()->Data; + OS->flush(); +} diff --git a/clang/test/CXX/basic/basic.link/p10-ex2.cpp b/clang/test/CXX/basic/basic.link/p10-ex2.cpp index 95fdb56f78d625..e985ce37a93495 100644 --- a/clang/test/CXX/basic/basic.link/p10-ex2.cpp +++ b/clang/test/CXX/basic/basic.link/p10-ex2.cpp @@ -5,7 +5,9 @@ // // RUN: %clang_cc1 -std=c++20 M.cpp -fsyntax-only -DTEST_INTERFACE -verify // RUN: %clang_cc1 -std=c++20 M.cpp -emit-module-interface -o M.pcm +// RUN: %clang_cc1 -std=c++20 M.cpp -emit-reduced-module-interface -o M.reduced.pcm // RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.pcm -verify +// RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.reduced.pcm -verify //--- decls.h int f(); // #1, attached to the global module diff --git a/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp b/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp index 638057cbd681f0..3c120654f2ee5d 100644 --- a/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp +++ b/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp @@ -8,7 +8,10 @@ // RUN: split-file %s %t // // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Friend-in-reachable-class.cppm -o %t/X.pcm -// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Friend-in-reachable-class.cppm \ +// RUN: -o %t/X.reduced.pcm +// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.pcm %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.reduced.pcm %t/Use.cpp -verify -fsyntax-only // //--- Friend-in-reachable-class.cppm module; diff --git a/clang/test/Modules/InheritDefaultArguments.cppm b/clang/test/Modules/InheritDefaultArguments.cppm index 0afb46319ff850..0ef6390204c4b9 100644 --- a/clang/test/Modules/InheritDefaultArguments.cppm +++ b/clang/test/Modules/InheritDefaultArguments.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only + //--- foo.h template class Templ; diff --git a/clang/test/Modules/Reachability-Private.cpp b/clang/test/Modules/Reachability-Private.cpp index 9a7c3ba231f179..3ce108dc5c5509 100644 --- a/clang/test/Modules/Reachability-Private.cpp +++ b/clang/test/Modules/Reachability-Private.cpp @@ -9,6 +9,16 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \ // RUN: -DTEST_BADINLINE -verify -fsyntax-only +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/Private.cppm -emit-reduced-module-interface \ +// RUN: -o %t/Private.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \ +// RUN: -DTEST_BADINLINE -verify -fsyntax-only + //--- Private.cppm export module Private; #ifdef TEST_BADINLINE diff --git a/clang/test/Modules/Reachability-func-default-arg.cpp b/clang/test/Modules/Reachability-func-default-arg.cpp index 0d6d8655d53293..bc0cafdebb7a4e 100644 --- a/clang/test/Modules/Reachability-func-default-arg.cpp +++ b/clang/test/Modules/Reachability-func-default-arg.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/func_default_arg.cppm -emit-module-interface -o %t/func_default_arg.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/func_default_arg.cppm -emit-reduced-module-interface -o %t/func_default_arg.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // //--- func_default_arg.cppm export module func_default_arg; diff --git a/clang/test/Modules/Reachability-func-ret.cpp b/clang/test/Modules/Reachability-func-ret.cpp index ca5bbc68d759f9..7d34387726f683 100644 --- a/clang/test/Modules/Reachability-func-ret.cpp +++ b/clang/test/Modules/Reachability-func-ret.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/func_ret.cppm -emit-module-interface -o %t/func_ret.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/func_ret.cppm -emit-reduced-module-interface -o %t/func_ret.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // //--- func_ret.cppm export module func_ret; diff --git a/clang/test/Modules/Reachability-template-default-arg.cpp b/clang/test/Modules/Reachability-template-default-arg.cpp index 6fb109e41fcf0a..35c647d0d344ba 100644 --- a/clang/test/Modules/Reachability-template-default-arg.cpp +++ b/clang/test/Modules/Reachability-template-default-arg.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/template_default_arg.cppm -emit-module-interface -o %t/template_default_arg.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/template_default_arg.cppm -emit-reduced-module-interface -o %t/template_default_arg.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- template_default_arg.cppm export module template_default_arg; diff --git a/clang/test/Modules/Reachability-template-instantiation.cpp b/clang/test/Modules/Reachability-template-instantiation.cpp index 2170c7b92a370a..6f363ed00b6e36 100644 --- a/clang/test/Modules/Reachability-template-instantiation.cpp +++ b/clang/test/Modules/Reachability-template-instantiation.cpp @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/Use.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cppm -fprebuilt-module-path=%t -emit-reduced-module-interface -o %t/Use.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cpp -verify -fsyntax-only // //--- Templ.h #ifndef TEMPL_H diff --git a/clang/test/Modules/Reachability-using-templates.cpp b/clang/test/Modules/Reachability-using-templates.cpp index f530e15bd4d2ba..65601c1cfe4e2d 100644 --- a/clang/test/Modules/Reachability-using-templates.cpp +++ b/clang/test/Modules/Reachability-using-templates.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/mod.templates.cppm -emit-module-interface -o %t/mod.templates.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/mod.templates.cppm -emit-reduced-module-interface -o %t/mod.templates.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- mod.templates.cppm export module mod.templates; diff --git a/clang/test/Modules/Reachability-using.cpp b/clang/test/Modules/Reachability-using.cpp index 642b97dd8432c3..8301bfbedf8704 100644 --- a/clang/test/Modules/Reachability-using.cpp +++ b/clang/test/Modules/Reachability-using.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/mod.cppm -emit-module-interface -o %t/mod.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/mod.cppm -emit-reduced-module-interface -o %t/mod.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- mod.cppm export module mod; diff --git a/clang/test/Modules/concept.cppm b/clang/test/Modules/concept.cppm index 0fdb5ea8968085..4464cf7c0a416c 100644 --- a/clang/test/Modules/concept.cppm +++ b/clang/test/Modules/concept.cppm @@ -11,7 +11,6 @@ // RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf -fprebuilt-module-path=%t -I%t \ // RUN: -DDIFFERENT -DSKIP_ODR_CHECK_IN_GMF %t/B.cppm -verify - //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/concept_differ.cppm b/clang/test/Modules/concept_differ.cppm index ccb29d26e53d13..525ee2d4edcc8e 100644 --- a/clang/test/Modules/concept_differ.cppm +++ b/clang/test/Modules/concept_differ.cppm @@ -5,6 +5,11 @@ // RUN: %clang_cc1 -x c++ -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -x c++ -std=c++20 %t/B.cppm -I%t -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t %t/foo.cpp -verify +// +// RUN: rm %t/A.pcm %t/B.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 %t/B.cppm -I%t -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t %t/foo.cpp -verify //--- foo.h template diff --git a/clang/test/Modules/ctor.arg.dep.cppm b/clang/test/Modules/ctor.arg.dep.cppm index 0e5b1a694f6a5e..10924bfe0f1bdc 100644 --- a/clang/test/Modules/ctor.arg.dep.cppm +++ b/clang/test/Modules/ctor.arg.dep.cppm @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // +// RUN: rm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// //--- foo.h namespace ns { diff --git a/clang/test/Modules/cxx20-10-1-ex1.cpp b/clang/test/Modules/cxx20-10-1-ex1.cpp index b330e0a6c9a9d8..4445b19ea86cf4 100644 --- a/clang/test/Modules/cxx20-10-1-ex1.cpp +++ b/clang/test/Modules/cxx20-10-1-ex1.cpp @@ -19,6 +19,22 @@ // RUN: -fmodule-file=A=%t/A.pcm -fmodule-file=A:Foo=%t/A_Foo.pcm \ // RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm -o %t/ex1.o +// RUN: rm %t/A_Internals.pcm %t/A_Foo.pcm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu1.cpp \ +// RUN: -o %t/A_Internals.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu2.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm -o %t/A_Foo.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu3.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm \ +// RUN: -fmodule-file=A:Foo=%t/A_Foo.pcm -o %t/A.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex1-tu4.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm \ +// RUN: -fmodule-file=A:Foo=%t/A_Foo.pcm \ +// RUN: -fmodule-file=A=%t/A.pcm -o %t/ex1.o + // expected-no-diagnostics //--- std10-1-ex1-tu1.cpp diff --git a/clang/test/Modules/cxx20-10-1-ex2.cpp b/clang/test/Modules/cxx20-10-1-ex2.cpp index 8b908d5fa2eda6..fc61d89926d448 100644 --- a/clang/test/Modules/cxx20-10-1-ex2.cpp +++ b/clang/test/Modules/cxx20-10-1-ex2.cpp @@ -5,26 +5,50 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu1.cpp \ // RUN: -o %t/B_Y.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu2.cpp \ // RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu3.cpp \ // RUN: -o %t/B_X1.pcm -verify - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu4.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X2.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex2-tu5.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu5.o - +// // RUN: %clang_cc1 -std=c++20 -S %t/std10-1-ex2-tu6.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu6.s -verify - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu7.cpp \ // RUN: -fmodule-file=B:X2=%t/B_X2.pcm -fmodule-file=B=%t/B.pcm \ // RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X3.pcm -verify +// Test again with reduced BMI. +// RUN: rm %t/B_X2.pcm %t/B.pcm %t/B_Y.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu1.cpp \ +// RUN: -o %t/B_Y.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu2.cpp \ +// RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu3.cpp \ +// RUN: -o %t/B_X1.pcm -verify +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu4.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X2.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex2-tu5.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu5.o +// +// RUN: %clang_cc1 -std=c++20 -S %t/std10-1-ex2-tu6.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu6.s -verify +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu7.cpp \ +// RUN: -fmodule-file=B:X2=%t/B_X2.pcm -fmodule-file=B=%t/B.pcm \ +// RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X3.pcm -verify + //--- std10-1-ex2-tu1.cpp module B:Y; int y(); diff --git a/clang/test/Modules/cxx20-10-2-ex2.cpp b/clang/test/Modules/cxx20-10-2-ex2.cpp index bc66d6a2ec1a92..b48d96478b9a65 100644 --- a/clang/test/Modules/cxx20-10-2-ex2.cpp +++ b/clang/test/Modules/cxx20-10-2-ex2.cpp @@ -14,6 +14,18 @@ // RUN: -fmodule-file=%t/std-10-2-ex2-c.pcm -fmodule-file=X=%t/X.pcm \ // RUN: -pedantic-errors -verify -o %t/M.pcm +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -I %t \ +// RUN: -xc++-user-header std-10-2-ex2-c.h -o %t/std-10-2-ex2-c.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex2-tu1.cpp \ +// RUN: -o %t/X.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex2-tu2.cpp \ +// RUN: -fmodule-file=%t/std-10-2-ex2-c.pcm -fmodule-file=X=%t/X.pcm \ +// RUN: -pedantic-errors -verify -o %t/M.pcm + + //--- std-10-2-ex2-b.h int f(); diff --git a/clang/test/Modules/cxx20-10-2-ex5.cpp b/clang/test/Modules/cxx20-10-2-ex5.cpp index 49c5934c8f2172..f222568072393f 100644 --- a/clang/test/Modules/cxx20-10-2-ex5.cpp +++ b/clang/test/Modules/cxx20-10-2-ex5.cpp @@ -13,6 +13,18 @@ // RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu3.cpp \ // RUN: -fmodule-file=M=%t/M.pcm -verify -o %t/main.o +// Test again with reduced BMI. +// RUN: rm %t/M.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex5-tu1.cpp \ +// RUN: -o %t/M.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu2.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -o %t/tu-2.o + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu3.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -verify -o %t/main.o + + //--- std-10-2-ex5-tu1.cpp export module M; export struct X { diff --git a/clang/test/Modules/cxx20-10-3-ex1.cpp b/clang/test/Modules/cxx20-10-3-ex1.cpp index 5d6e2554f753b0..99b88c7e442ffd 100644 --- a/clang/test/Modules/cxx20-10-3-ex1.cpp +++ b/clang/test/Modules/cxx20-10-3-ex1.cpp @@ -14,6 +14,20 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-3-ex1-tu4.cpp \ // RUN: -fmodule-file=M:Part=%t/M_Part.pcm -o %t/M.pcm +// Test again with reduced BMI. +// RUN: rm %t/M_PartImpl.pcm %t/M.pcm %t/M_Part.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu1.cpp \ +// RUN: -o %t/M_PartImpl.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu2.cpp \ +// RUN: -fmodule-file=M:PartImpl=%t/M_PartImpl.pcm -o %t/M.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu3.cpp \ +// RUN: -o %t/M_Part.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu4.cpp \ +// RUN: -fmodule-file=M:Part=%t/M_Part.pcm -o %t/M.pcm + //--- std10-3-ex1-tu1.cpp module M:PartImpl; diff --git a/clang/test/Modules/cxx20-10-3-ex2.cpp b/clang/test/Modules/cxx20-10-3-ex2.cpp index b1d6d669c0a0e6..40566c00f578c2 100644 --- a/clang/test/Modules/cxx20-10-3-ex2.cpp +++ b/clang/test/Modules/cxx20-10-3-ex2.cpp @@ -11,6 +11,16 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-3-ex2-tu3.cpp \ // RUN: -o %t/M.pcm -verify +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex2-tu1.cpp \ +// RUN: -o %t/M.pcm + +// RUN: %clang_cc1 -std=c++20 -S %t/std10-3-ex2-tu2.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -o %t/tu_8.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex2-tu3.cpp \ +// RUN: -o %t/M.pcm -verify + //--- std10-3-ex2-tu1.cpp export module M; diff --git a/clang/test/Modules/cxx20-10-5-ex1.cpp b/clang/test/Modules/cxx20-10-5-ex1.cpp index a83162c5c15017..0435b3a64c075d 100644 --- a/clang/test/Modules/cxx20-10-5-ex1.cpp +++ b/clang/test/Modules/cxx20-10-5-ex1.cpp @@ -11,6 +11,18 @@ // RUN: %clang_cc1 -std=c++20 std-10-5-ex1-use.cpp -fmodule-file=A=A.pcm \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm A.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface std-10-5-ex1-interface.cpp \ +// RUN: -DBAD_FWD_DECL -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface std-10-5-ex1-interface.cpp \ +// RUN: -o A.pcm + +// RUN: %clang_cc1 -std=c++20 std-10-5-ex1-use.cpp -fmodule-file=A=A.pcm \ +// RUN: -fsyntax-only -verify + + //--- std-10-5-ex1-interface.cpp export module A; diff --git a/clang/test/Modules/cxx20-import-diagnostics-a.cpp b/clang/test/Modules/cxx20-import-diagnostics-a.cpp index a5cf44ed82d5ff..1b38259e0358c0 100644 --- a/clang/test/Modules/cxx20-import-diagnostics-a.cpp +++ b/clang/test/Modules/cxx20-import-diagnostics-a.cpp @@ -36,6 +36,45 @@ // RUN: %clang_cc1 -std=c++20 -emit-obj %t/import-diags-tu11.cpp \ // RUN: -fmodule-file=C=%t/C.pcm -o %t/impl.o +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu1.cpp \ +// RUN: -o %t/B.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu2.cpp \ +// RUN: -o %t/C.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu3.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/AOK1.pcm + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu4.cpp \ +// RUN: -fmodule-file=AOK1=%t/AOK1.pcm -fmodule-file=B=%t/B.pcm \ +// RUN: -fmodule-file=C=%t/C.pcm -o %t/tu_3.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu5.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/BC.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu6.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/tu_5.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu7.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/D.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu8.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/D.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu9.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/tu_8.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu10.cpp \ +// RUN: -o %t/B.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/import-diags-tu11.cpp \ +// RUN: -fmodule-file=C=%t/C.pcm -o %t/impl.o + // Test diagnostics for incorrect module import sequences. //--- import-diags-tu1.cpp diff --git a/clang/test/Modules/cxx20-import-diagnostics-b.cpp b/clang/test/Modules/cxx20-import-diagnostics-b.cpp index 7d432633552a25..db522d7babd3ae 100644 --- a/clang/test/Modules/cxx20-import-diagnostics-b.cpp +++ b/clang/test/Modules/cxx20-import-diagnostics-b.cpp @@ -22,6 +22,31 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/g.cpp \ // RUN: -fmodule-file=a=%t/a.pcm -o %t/g.pcm -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cpp -o %t/a.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/c.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/c.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/d.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/d.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/e.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/e.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a-part.cpp \ +// RUN: -o %t/a-part.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/f.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/f.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/g.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/g.pcm -verify + //--- a.cpp export module a; diff --git a/clang/test/Modules/cxx20-module-file-info-macros.cpp b/clang/test/Modules/cxx20-module-file-info-macros.cpp index bc7df1c9f50b59..3b67e9b9acd410 100644 --- a/clang/test/Modules/cxx20-module-file-info-macros.cpp +++ b/clang/test/Modules/cxx20-module-file-info-macros.cpp @@ -17,6 +17,9 @@ // RUN: %clang_cc1 -std=c++20 %t/named_module.cppm -emit-module-interface -o %t/M.pcm // RUN: %clang_cc1 -module-file-info %t/M.pcm | FileCheck %t/named_module.cppm +// RUN: %clang_cc1 -std=c++20 %t/named_module.cppm -emit-reduced-module-interface -o %t/M.pcm +// RUN: %clang_cc1 -module-file-info %t/M.pcm | FileCheck %t/named_module.cppm + //--- foo.h #pragma once #define FOO diff --git a/clang/test/Modules/deduction-guide.cppm b/clang/test/Modules/deduction-guide.cppm index 9c959a71365dac..02ac2c0053cff5 100644 --- a/clang/test/Modules/deduction-guide.cppm +++ b/clang/test/Modules/deduction-guide.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- foo.h template class Templ { diff --git a/clang/test/Modules/deduction-guide2.cppm b/clang/test/Modules/deduction-guide2.cppm index a163c365683101..889670b973f0d3 100644 --- a/clang/test/Modules/deduction-guide2.cppm +++ b/clang/test/Modules/deduction-guide2.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- Templ.cppm export module Templ; export template diff --git a/clang/test/Modules/deduction-guide3.cppm b/clang/test/Modules/deduction-guide3.cppm index 8fa08a0625d7c8..1165dd40bcfb8c 100644 --- a/clang/test/Modules/deduction-guide3.cppm +++ b/clang/test/Modules/deduction-guide3.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- Templ.cppm export module Templ; template diff --git a/clang/test/Modules/derived_class.cpp b/clang/test/Modules/derived_class.cpp index ee9e0ae4637ec7..e0c5a652eba4ea 100644 --- a/clang/test/Modules/derived_class.cpp +++ b/clang/test/Modules/derived_class.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- bar.h struct bar_base { diff --git a/clang/test/Modules/duplicated-module-file-eq-module-name.cppm b/clang/test/Modules/duplicated-module-file-eq-module-name.cppm index e86dbe2b941ef8..57ffb560ab540a 100644 --- a/clang/test/Modules/duplicated-module-file-eq-module-name.cppm +++ b/clang/test/Modules/duplicated-module-file-eq-module-name.cppm @@ -8,6 +8,10 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/u.cpp -fmodule-file=a=%t/unexist.pcm \ // RUN: -fmodule-file=a=%t/a.pcm -verify -fsyntax-only +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/u.cpp -fmodule-file=a=%t/unexist.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm -verify -fsyntax-only //--- a.cppm export module a; diff --git a/clang/test/Modules/enum-class.cppm b/clang/test/Modules/enum-class.cppm index 01ae8c0d8814da..992eb9d5e55100 100644 --- a/clang/test/Modules/enum-class.cppm +++ b/clang/test/Modules/enum-class.cppm @@ -6,6 +6,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only //--- foo.h enum class foo { diff --git a/clang/test/Modules/explicitly-specialized-template.cpp b/clang/test/Modules/explicitly-specialized-template.cpp index 89677254ea739a..2450bbe31bd9b7 100644 --- a/clang/test/Modules/explicitly-specialized-template.cpp +++ b/clang/test/Modules/explicitly-specialized-template.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/X.cppm -emit-module-interface -o %t/X.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // +// RUN: %clang_cc1 -std=c++20 %t/X.cppm -emit-reduced-module-interface -o %t/X.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify +// //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/export-language-linkage.cppm b/clang/test/Modules/export-language-linkage.cppm index 462b28d36cb44b..f389d9604ef3a8 100644 --- a/clang/test/Modules/export-language-linkage.cppm +++ b/clang/test/Modules/export-language-linkage.cppm @@ -8,6 +8,11 @@ // RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm // RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fmodule-file=c=%t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -module-file-info %t/a.pcm | FileCheck %t/a.cppm + //--- a.cppm export module a; export extern "C++" int foo() { return 43; } @@ -43,6 +48,7 @@ int use() { } //--- c.cppm +// expected-no-diagnostics export module c; extern "C++" { export int f(); @@ -59,5 +65,5 @@ int use() { int use_of_nonexported() { return h(); // expected-error {{declaration of 'h' must be imported from module 'c' before it is required}} - // expected-note@c.cppm:4 {{declaration here is not visible}} + // expected-note@c.cppm:5 {{declaration here is not visible}} } diff --git a/clang/test/Modules/ftime-trace.cppm b/clang/test/Modules/ftime-trace.cppm index 48cd4113ec7826..8882e85be15156 100644 --- a/clang/test/Modules/ftime-trace.cppm +++ b/clang/test/Modules/ftime-trace.cppm @@ -9,5 +9,14 @@ // RUN: %clang_cc1 -std=c++20 %t/a.pcm -ftime-trace=%t/a.json -o - // RUN: ls %t | grep "a.json" +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.pcm -ftime-trace=%t/a.json -o - +// RUN: ls %t | grep "a.json" + //--- a.cppm export module a; diff --git a/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm b/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm index abcbec07f97de0..3991e47ce21513 100644 --- a/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm +++ b/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -std=c++20 %t/D.cppm -I%t -emit-module-interface -o %t/D.pcm // RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/D-part.cppm -I%t -fprebuilt-module-path=%t -verify +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -I%t -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/A.cppm -I%t -fprebuilt-module-path=%t -verify +// +// RUN: %clang_cc1 -std=c++20 %t/D.cppm -I%t -emit-reduced-module-interface -o %t/D.pcm +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/D-part.cppm -I%t -fprebuilt-module-path=%t -verify + //--- A.cppm module; export module baz:A; diff --git a/clang/test/Modules/inconsistent-export.cppm b/clang/test/Modules/inconsistent-export.cppm index 5e94d2b37b7578..0c74ba9037702a 100644 --- a/clang/test/Modules/inconsistent-export.cppm +++ b/clang/test/Modules/inconsistent-export.cppm @@ -9,6 +9,19 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -emit-obj +// Test again with reduced BMI. +// RUN: rm -fr %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/m-a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/m-b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -emit-obj + + //--- a.cppm export module m:a; namespace n { diff --git a/clang/test/Modules/inherited_arg.cppm b/clang/test/Modules/inherited_arg.cppm index eb66b70cdce336..a9b6efabb1e6f7 100644 --- a/clang/test/Modules/inherited_arg.cppm +++ b/clang/test/Modules/inherited_arg.cppm @@ -7,6 +7,14 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// Test again with reduced BMI. +// +// RUN: %clang_cc1 -std=c++20 %t/A-B.cppm -I%t -emit-reduced-module-interface -o %t/A-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/A-C.cppm -I%t -emit-reduced-module-interface -o %t/A-C.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + + //--- foo.h template class pair {}; diff --git a/clang/test/Modules/instantiation-argdep-lookup.cppm b/clang/test/Modules/instantiation-argdep-lookup.cppm index fc9009a5bc13d5..62dabfb6efddcf 100644 --- a/clang/test/Modules/instantiation-argdep-lookup.cppm +++ b/clang/test/Modules/instantiation-argdep-lookup.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// //--- foo.h namespace ns { diff --git a/clang/test/Modules/lambdas.cppm b/clang/test/Modules/lambdas.cppm index 7f00cf6f8682ac..be614b0519161a 100644 --- a/clang/test/Modules/lambdas.cppm +++ b/clang/test/Modules/lambdas.cppm @@ -11,6 +11,21 @@ // RUN: -o %t/lambdas2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ // RUN: -verify -DUSE_LAMBDA2 +// +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/lambdas.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify +// +// RUN: %clang_cc1 -std=c++20 %t/lambdas2.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify -DUSE_LAMBDA2 //--- lambdas.h auto l1 = []() constexpr -> int { diff --git a/clang/test/Modules/merge-concepts-cxx-modules.cpp b/clang/test/Modules/merge-concepts-cxx-modules.cpp index 3d4f8435531a88..0127e8baad6b94 100644 --- a/clang/test/Modules/merge-concepts-cxx-modules.cpp +++ b/clang/test/Modules/merge-concepts-cxx-modules.cpp @@ -8,6 +8,18 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/conflicting.cppm -o %t/conflicting.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cppm -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/same_as.cppm -o %t/same_as.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/concepts.cppm -o %t/concepts.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/format.cppm -o %t/format.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/conflicting.cppm -o %t/conflicting.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cppm -fsyntax-only -verify + + //--- same_as.cppm export module same_as; export template diff --git a/clang/test/Modules/merge-constrained-friends.cpp b/clang/test/Modules/merge-constrained-friends.cpp index 8f0e9ed83cf296..d0317b99801e97 100644 --- a/clang/test/Modules/merge-constrained-friends.cpp +++ b/clang/test/Modules/merge-constrained-friends.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++23 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++23 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++23 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++23 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- A.cppm module; export module A; diff --git a/clang/test/Modules/merge-lambdas.cppm b/clang/test/Modules/merge-lambdas.cppm index a1d04ab4e234dd..4363e452c2bcd3 100644 --- a/clang/test/Modules/merge-lambdas.cppm +++ b/clang/test/Modules/merge-lambdas.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -fsyntax-only -verify + //--- lambda.h inline auto cmp = [](auto l, auto r) { return l < r; diff --git a/clang/test/Modules/merge-requires-with-lambdas.cppm b/clang/test/Modules/merge-requires-with-lambdas.cppm index 5767492047684b..c4d6e0539f41ea 100644 --- a/clang/test/Modules/merge-requires-with-lambdas.cppm +++ b/clang/test/Modules/merge-requires-with-lambdas.cppm @@ -17,6 +17,25 @@ // RUN: %clang_cc1 -std=c++20 %t/A3.cppm -emit-module-interface -o %t/A3.pcm // RUN: %clang_cc1 -std=c++20 %t/TestA3.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A0.cppm -emit-reduced-module-interface -o %t/A0.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A1.cppm -emit-reduced-module-interface -o %t/A1.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA1.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A2.cppm -emit-reduced-module-interface -o %t/A2.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA2.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A3.cppm -emit-reduced-module-interface -o %t/A3.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA3.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + + //--- A.h template concept A = requires(const _Tp& __t) { [](const __Up&) {}(__t); }; diff --git a/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm b/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm index a451bfe7804d33..db3f4cd5187169 100644 --- a/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm +++ b/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm @@ -7,6 +7,11 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fprebuilt-module-path=%t %t/reexport2.cppm -o %t/reexport2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/var_def.cppm -o %t/var_def.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/reexport1.cppm -o %t/reexport1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/reexport2.cppm -o %t/reexport2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cppm -fsyntax-only -verify + //--- use.cppm import reexport1; import reexport2; diff --git a/clang/test/Modules/mismatch-diagnostics.cpp b/clang/test/Modules/mismatch-diagnostics.cpp index f8ce987cfba572..5a026aa1f6c020 100644 --- a/clang/test/Modules/mismatch-diagnostics.cpp +++ b/clang/test/Modules/mismatch-diagnostics.cpp @@ -13,6 +13,17 @@ // RUN: -fprebuilt-module-path=%t/prebuilt_modules -DCHECK_MISMATCH \ // RUN: %t/use.cpp 2>&1 | FileCheck %s +// Test again with reduced BMI. +// RUN: %clang_cc1 -triple %itanium_abi_triple \ +// RUN: -std=c++20 -fprebuilt-module-path=%t/prebuilt-modules \ +// RUN: -emit-reduced-module-interface -pthread -DBUILD_MODULE \ +// RUN: %t/mismatching_module.cppm -o \ +// RUN: %t/prebuilt_modules/mismatching_module.pcm +// +// RUN: not %clang_cc1 -triple %itanium_abi_triple -std=c++20 \ +// RUN: -fprebuilt-module-path=%t/prebuilt_modules -DCHECK_MISMATCH \ +// RUN: %t/use.cpp 2>&1 | FileCheck %s + //--- mismatching_module.cppm export module mismatching_module; diff --git a/clang/test/Modules/module-init-duplicated-import.cppm b/clang/test/Modules/module-init-duplicated-import.cppm index 7adce11779566e..1326402bb4ded3 100644 --- a/clang/test/Modules/module-init-duplicated-import.cppm +++ b/clang/test/Modules/module-init-duplicated-import.cppm @@ -9,6 +9,17 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ // RUN: -fmodule-file=a=%t/a.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm +// Test again with reduced BMI. +// Note that we can't use reduced BMI here for m.cppm since it is required +// to generate the backend code. +// RUN: rm %t/a.pcm %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \ +// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm + //--- a.cppm export module a; export struct A { diff --git a/clang/test/Modules/named-modules-adl-2.cppm b/clang/test/Modules/named-modules-adl-2.cppm index 655acfcd93f69a..a14b9a68d74e41 100644 --- a/clang/test/Modules/named-modules-adl-2.cppm +++ b/clang/test/Modules/named-modules-adl-2.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-module-interface -o %t/b.pcm // RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm -fsyntax-only -verify + //--- a.cppm export module a; diff --git a/clang/test/Modules/named-modules-adl-3.cppm b/clang/test/Modules/named-modules-adl-3.cppm index 2fc2962c926b1b..d70946fa068b3a 100644 --- a/clang/test/Modules/named-modules-adl-3.cppm +++ b/clang/test/Modules/named-modules-adl-3.cppm @@ -14,6 +14,20 @@ // RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/c.cppm -fmodule-file=a=%t/a.pcm \ // RUN: -fmodule-file=b=%t/b.pcm -fsyntax-only -verify +// Test again with reduced BMI. +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-reduced-module-interface \ +// RUN: -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm \ +// RUN: -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/b.cppm -fmodule-file=a=%t/a.pcm \ +// RUN: -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/c.cppm -fmodule-file=a=%t/a.pcm \ +// RUN: -fmodule-file=b=%t/b.pcm -fsyntax-only -verify + //--- foo.h namespace n { diff --git a/clang/test/Modules/named-modules-adl.cppm b/clang/test/Modules/named-modules-adl.cppm index d5133ef367265a..ef250023f91e75 100644 --- a/clang/test/Modules/named-modules-adl.cppm +++ b/clang/test/Modules/named-modules-adl.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -fsyntax-only -verify + //--- a.h namespace n { diff --git a/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm b/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm index a743b64cb18d6e..36a2d8bc8c95ce 100644 --- a/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm +++ b/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm @@ -10,6 +10,16 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.pcm -S -emit-llvm -o - \ // RUN: -fprebuilt-module-path=%t | FileCheck %t/B.cppm +// Test again with reduced BMI. Note that we need to generate full BMI for B.cppm +// since it is required to generate backend codes. +// RUN: rm %t/A.pcm %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.cppm -emit-module-interface -o %t/B.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.pcm -S -emit-llvm -o - \ +// RUN: -fprebuilt-module-path=%t | FileCheck %t/B.cppm + + //--- foo.h template diff --git a/clang/test/Modules/pair-unambiguous-ctor.cppm b/clang/test/Modules/pair-unambiguous-ctor.cppm index eb242244260cbd..24fb15959577b0 100644 --- a/clang/test/Modules/pair-unambiguous-ctor.cppm +++ b/clang/test/Modules/pair-unambiguous-ctor.cppm @@ -10,6 +10,15 @@ // RUN: %clang_cc1 -std=c++20 %t/algorithm.cppm -I%t -emit-module-interface -o %t/std-algorithm.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cppm -I%t -fprebuilt-module-path=%t -emit-module-interface -verify -o %t/Use.pcm +// Test again with reduced BMI. +// RUN: rm -fr %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/string.cppm -I%t -emit-reduced-module-interface -o %t/std-string.pcm +// RUN: %clang_cc1 -std=c++20 %t/algorithm.cppm -I%t -emit-reduced-module-interface -o %t/std-algorithm.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cppm -I%t -fprebuilt-module-path=%t -emit-reduced-module-interface -verify -o %t/Use.pcm + //--- Use.cppm // expected-no-diagnostics module; diff --git a/clang/test/Modules/partial_specialization.cppm b/clang/test/Modules/partial_specialization.cppm index 3a01857172112e..1d65a375643a28 100644 --- a/clang/test/Modules/partial_specialization.cppm +++ b/clang/test/Modules/partial_specialization.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify +// //--- foo.h template inline constexpr bool IsSame = false; diff --git a/clang/test/Modules/placement-new-reachable.cpp b/clang/test/Modules/placement-new-reachable.cpp index 29263173d78f45..6b495a60306bc1 100644 --- a/clang/test/Modules/placement-new-reachable.cpp +++ b/clang/test/Modules/placement-new-reachable.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- placement.h namespace std { using size_t = decltype(sizeof(0)); diff --git a/clang/test/Modules/polluted-operator.cppm b/clang/test/Modules/polluted-operator.cppm index 721ca061c939f4..2179fa098064ae 100644 --- a/clang/test/Modules/polluted-operator.cppm +++ b/clang/test/Modules/polluted-operator.cppm @@ -11,6 +11,9 @@ // RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/b.cppm -fprebuilt-module-path=%t \ // RUN: -emit-module-interface -DSKIP_ODR_CHECK_IN_GMF -o %t/b.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -emit-reduced-module-interface -o %t/b.pcm -verify + //--- foo.h namespace std diff --git a/clang/test/Modules/pr54457.cppm b/clang/test/Modules/pr54457.cppm index ed67ec1065376e..d55bdfbf3b7582 100644 --- a/clang/test/Modules/pr54457.cppm +++ b/clang/test/Modules/pr54457.cppm @@ -9,6 +9,9 @@ // RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -o %t/C.pcm // RUN: %clang_cc1 -std=c++20 %t/UseC.cppm -fprebuilt-module-path=%t -verify -S -o - +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -o %t/C.pcm +// RUN: %clang_cc1 -std=c++20 %t/UseC.cppm -fprebuilt-module-path=%t -verify -S -o - + //--- A.cppm // expected-no-diagnostics export module A; diff --git a/clang/test/Modules/pr56916.cppm b/clang/test/Modules/pr56916.cppm index a435b06d5cf152..09cea6720427b3 100644 --- a/clang/test/Modules/pr56916.cppm +++ b/clang/test/Modules/pr56916.cppm @@ -8,6 +8,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fsyntax-only -fprebuilt-module-path=%t -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/M-A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/M-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o %t/M.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fsyntax-only -fprebuilt-module-path=%t -verify + + //--- foo.h template class Templ { diff --git a/clang/test/Modules/pr58532.cppm b/clang/test/Modules/pr58532.cppm index cf530b4ac2ccce..35bebb41431e7b 100644 --- a/clang/test/Modules/pr58532.cppm +++ b/clang/test/Modules/pr58532.cppm @@ -7,6 +7,12 @@ // RUN: %clang_cc1 -std=c++20 %t/implementation.cpp -fmodule-file=m=%t/m.pcm \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 %t/interface.cppm -emit-reduced-module-interface \ +// RUN: -o %t/m.pcm +// RUN: %clang_cc1 -std=c++20 %t/implementation.cpp -fmodule-file=m=%t/m.pcm \ +// RUN: -fsyntax-only -verify + //--- invisible.h #pragma once // This breaks things. const int kInvisibleSymbol = 0; diff --git a/clang/test/Modules/pr58716.cppm b/clang/test/Modules/pr58716.cppm index 3f97fca7d5e8a3..177802fe3afcb8 100644 --- a/clang/test/Modules/pr58716.cppm +++ b/clang/test/Modules/pr58716.cppm @@ -8,7 +8,7 @@ // // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++20 -emit-module-interface %t/m.cppm -o %t/m.pcm // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++20 %t/m.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm -// + //--- m.cppm module; #include "fail.h" diff --git a/clang/test/Modules/pr59719.cppm b/clang/test/Modules/pr59719.cppm index 5aea8992a0ca85..5a600c8e36a4b6 100644 --- a/clang/test/Modules/pr59719.cppm +++ b/clang/test/Modules/pr59719.cppm @@ -7,6 +7,9 @@ // RUN: %clang_cc1 -std=c++20 %t/data.cppm -emit-module-interface -o %t/data.pcm // RUN: %clang_cc1 -std=c++20 %t/main.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/data.cppm -emit-reduced-module-interface -o %t/data.pcm +// RUN: %clang_cc1 -std=c++20 %t/main.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h namespace std { diff --git a/clang/test/Modules/pr59780.cppm b/clang/test/Modules/pr59780.cppm index d4bbd52c13f1a4..ee81ca575d7bf6 100644 --- a/clang/test/Modules/pr59780.cppm +++ b/clang/test/Modules/pr59780.cppm @@ -9,6 +9,16 @@ // RUN: -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/use.cpp // RUN: %clang_cc1 -std=c++20 %t/a.pcm -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/a.cppm +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-module-interface \ +// RUN: -o %t/a.full.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-reduced-module-interface \ +// RUN: -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t -S \ +// RUN: -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/use.cpp +// RUN: %clang_cc1 -std=c++20 %t/a.full.pcm -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/a.cppm + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr59999.cppm b/clang/test/Modules/pr59999.cppm index 23710de9fe1c55..54452c26de4710 100644 --- a/clang/test/Modules/pr59999.cppm +++ b/clang/test/Modules/pr59999.cppm @@ -11,6 +11,19 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ // RUN: -fmodule-file=Module=%t/Module.pcm -S -emit-llvm -o - | FileCheck %t/Object.cppm +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ +// RUN: -emit-reduced-module-interface -o %t/Module.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -S -emit-llvm -o - | FileCheck %t/Object.cppm + + //--- Module.cppm export module Module; diff --git a/clang/test/Modules/pr60036.cppm b/clang/test/Modules/pr60036.cppm index 297132cfde60bd..ffbc5fd56c2730 100644 --- a/clang/test/Modules/pr60036.cppm +++ b/clang/test/Modules/pr60036.cppm @@ -24,6 +24,20 @@ // RUN: -fmodule-file=c=%t/c.pcm -fmodule-file=d=%t/d.pcm -fmodule-file=e=%t/e.pcm \ // RUN: -fmodule-file=f=%t/f.pcm -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 %t/e.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/e.pcm +// RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/f.pcm +// RUN: %clang_cc1 -std=c++20 %t/g.cppm -fprebuilt-module-path=%t -verify -fsyntax-only + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr60085.cppm b/clang/test/Modules/pr60085.cppm index fd6fd914a543c3..37d8b09350b42b 100644 --- a/clang/test/Modules/pr60085.cppm +++ b/clang/test/Modules/pr60085.cppm @@ -14,6 +14,23 @@ // RUN: -S -emit-llvm -disable-llvm-passes -o - -fprebuilt-module-path=%t \ // RUN: | FileCheck %t/a.cppm +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/d.cppm \ +// RUN: -emit-reduced-module-interface -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cppm \ +// RUN: -emit-reduced-module-interface -o %t/c.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ +// RUN: -emit-reduced-module-interface -o %t/b.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ +// RUN: -emit-module-interface -o %t/a.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - -fprebuilt-module-path=%t \ +// RUN: | FileCheck %t/a.cppm + //--- d.cppm export module d; diff --git a/clang/test/Modules/pr60275.cppm b/clang/test/Modules/pr60275.cppm index 57b31c6952bea9..eb1ebc0e4330ac 100644 --- a/clang/test/Modules/pr60275.cppm +++ b/clang/test/Modules/pr60275.cppm @@ -5,7 +5,12 @@ // RUN: split-file %s %t // // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -emit-module-interface %t/a.cppm -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp + +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp + //--- foo.h consteval void global() {} diff --git a/clang/test/Modules/pr60486.cppm b/clang/test/Modules/pr60486.cppm index 13802a4917e6e7..1100662c43211e 100644 --- a/clang/test/Modules/pr60486.cppm +++ b/clang/test/Modules/pr60486.cppm @@ -7,6 +7,9 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 -fmodule-file=a=%t/a.pcm %t/b.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -fmodule-file=a=%t/a.pcm %t/b.cppm -fsyntax-only -verify + //--- foo.h template struct s { diff --git a/clang/test/Modules/pr60693.cppm b/clang/test/Modules/pr60693.cppm index c50791083a5bea..6fb3de60e59b08 100644 --- a/clang/test/Modules/pr60693.cppm +++ b/clang/test/Modules/pr60693.cppm @@ -7,6 +7,10 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm %t/c.cpp -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm %t/c.cpp -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr60775.cppm b/clang/test/Modules/pr60775.cppm index 4db027ba3600a9..35eb92512f4277 100644 --- a/clang/test/Modules/pr60775.cppm +++ b/clang/test/Modules/pr60775.cppm @@ -12,6 +12,19 @@ // RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-module-interface -fmodule-file=c=%t/c.pcm -o %t/f.pcm // RUN: %clang_cc1 -std=c++20 %t/g.cpp -fmodule-file=f=%t/f.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -I%t -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cpp -fmodule-file=a=%t/a.pcm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -I%t -emit-reduced-module-interface -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -fmodule-file=c=%t/c.pcm -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 %t/e.cpp -fmodule-file=d=%t/d.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-reduced-module-interface -fmodule-file=c=%t/c.pcm -o %t/f.pcm +// RUN: %clang_cc1 -std=c++20 %t/g.cpp -fmodule-file=f=%t/f.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only + //--- initializer_list.h namespace std { typedef decltype(sizeof(int)) size_t; diff --git a/clang/test/Modules/pr60890.cppm b/clang/test/Modules/pr60890.cppm index 2560bec5b43351..488b512aaac293 100644 --- a/clang/test/Modules/pr60890.cppm +++ b/clang/test/Modules/pr60890.cppm @@ -9,6 +9,12 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cppm -fprebuilt-module-path=%t -o %t/c.pcm // RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -S -emit-llvm -o - +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/b.cppm -fprebuilt-module-path=%t -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/c.cppm -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -S -emit-llvm -o - + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61065.cppm b/clang/test/Modules/pr61065.cppm index cf6fcdda78cd44..c79d7ac4457a11 100644 --- a/clang/test/Modules/pr61065.cppm +++ b/clang/test/Modules/pr61065.cppm @@ -10,6 +10,19 @@ // DISABLED: -fprebuilt-module-path=%t // DISABLED: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// DISABLED: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm \ +// DISABLED: -fprebuilt-module-path=%t +// DISABLED: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61065_2.cppm b/clang/test/Modules/pr61065_2.cppm index 10cc1a06b7e450..e898f4086af1de 100644 --- a/clang/test/Modules/pr61065_2.cppm +++ b/clang/test/Modules/pr61065_2.cppm @@ -11,6 +11,21 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/e.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -o %t/d.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/e.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm index baee4b83de5660..b7f9d22e253854 100644 --- a/clang/test/Modules/pr61067.cppm +++ b/clang/test/Modules/pr61067.cppm @@ -12,6 +12,20 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ // RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ +// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ +// RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61317.cppm b/clang/test/Modules/pr61317.cppm index 4b54d26dc5a63b..9ed20e7947062f 100644 --- a/clang/test/Modules/pr61317.cppm +++ b/clang/test/Modules/pr61317.cppm @@ -8,6 +8,15 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h #ifndef _FOO #define _FOO diff --git a/clang/test/Modules/pr61783.cppm b/clang/test/Modules/pr61783.cppm index 9cf773b0b282ba..c3bc853d2dee8e 100644 --- a/clang/test/Modules/pr61783.cppm +++ b/clang/test/Modules/pr61783.cppm @@ -9,6 +9,14 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/user.cpp -fmodule-file=mod=%t/mod.pcm \ // RUN: -S -emit-llvm -o - | FileCheck %t/user.cpp +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/mod.cppm -emit-reduced-module-interface \ +// RUN: -o %t/mod.pcm +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/mod.pcm -S -emit-llvm -o - | \ +// RUN: FileCheck %t/mod.cppm +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/user.cpp -fmodule-file=mod=%t/mod.pcm \ +// RUN: -S -emit-llvm -o - | FileCheck %t/user.cpp + //--- mod.cppm module; diff --git a/clang/test/Modules/pr61892.cppm b/clang/test/Modules/pr61892.cppm index 99d02f36b2b54b..7b8905036cd449 100644 --- a/clang/test/Modules/pr61892.cppm +++ b/clang/test/Modules/pr61892.cppm @@ -2,11 +2,25 @@ // RUN: mkdir -p %t // RUN: split-file %s %t // +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: -emit-module-interface %t/a.cppm -o %t/a.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/b.cpp +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/c.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/c.cpp + +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ -// RUN: -emit-module-interface %t/a.cppm -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ -// RUN: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ -// RUN: -emit-llvm -o - | FileCheck %t/b.cpp +// RUN: -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/b.cpp // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ // RUN: %t/c.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ // RUN: -emit-llvm -o - | FileCheck %t/c.cpp @@ -23,20 +37,10 @@ struct integer { export template int a = static_cast(integer()); -struct s { - ~s(); - operator int() const; -}; - -export template -auto d = s(); - int aa() { - return a + d; + return a; } -int dynamic_func(); -export inline int dynamic_var = dynamic_func(); //--- b.cpp import a; @@ -53,13 +57,9 @@ void b() {} //--- c.cpp import a; int c() { - return a + d + dynamic_var; + return a; } // The used variables are generated normally // CHECK-DAG: @_ZW1a1aIvE = -// CHECK-DAG: @_ZW1a1dIvE = -// CHECK-DAG: @_ZW1a11dynamic_var = linkonce_odr // CHECK-DAG: @_ZGVW1a1aIvE = -// CHECk-DAG: @_ZGVW1a1dIvE = -// CHECK-DAG: @_ZGVW1a11dynamic_var = linkonce_odr diff --git a/clang/test/Modules/pr62158.cppm b/clang/test/Modules/pr62158.cppm index 7a0761df771580..bb488fff108f28 100644 --- a/clang/test/Modules/pr62158.cppm +++ b/clang/test/Modules/pr62158.cppm @@ -6,6 +6,15 @@ // RUN: %clang_cc1 -std=c++20 %t/main.cpp -fmodule-file=lib=%t/lib.pcm \ // RUN: -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/lib.cppm -o %t/lib.pcm +// RUN: %clang_cc1 -std=c++20 %t/main.cpp -fmodule-file=lib=%t/lib.pcm \ +// RUN: -verify -fsyntax-only + //--- header.h namespace lib::inline __1 { template diff --git a/clang/test/Modules/pr62359.cppm b/clang/test/Modules/pr62359.cppm index 4632457e57f189..69acc3ce303a57 100644 --- a/clang/test/Modules/pr62359.cppm +++ b/clang/test/Modules/pr62359.cppm @@ -12,6 +12,22 @@ // RUN: %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Hello.cppm -o %t/Hello.pcm +// RUN: not %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only \ +// RUN: 2>&1 | FileCheck %t/use.cpp +// RUN: not %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only \ +// RUN: 2>&1 | FileCheck %t/use2.cpp +// +// RUN: %clang_cc1 -std=c++20 -fopenmp -emit-reduced-module-interface %t/Hello.cppm -o %t/Hello.pcm +// RUN: %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify + + //--- Hello.cppm export module hello; export void hello() { diff --git a/clang/test/Modules/pr62589.cppm b/clang/test/Modules/pr62589.cppm index 4164c3405ac0e3..c5aec3ed81846f 100644 --- a/clang/test/Modules/pr62589.cppm +++ b/clang/test/Modules/pr62589.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++23 -emit-module-interface %t/a.cppm -o %t/a.pcm // RUN: %clang_cc1 -std=c++23 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++23 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++23 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify + //--- foo.h class TypeA {}; diff --git a/clang/test/Modules/pr62705.cppm b/clang/test/Modules/pr62705.cppm index 00769d2277f4f1..9d996ae297d7af 100644 --- a/clang/test/Modules/pr62705.cppm +++ b/clang/test/Modules/pr62705.cppm @@ -10,6 +10,14 @@ // RUN: %clang_cc1 %t/b.pcm -std=c++20 -triple %itanium_abi_triple \ // RUN: -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 %t/a.cppm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 %t/b.cppm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -emit-module-interface -o %t/b.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm +// RUN: %clang_cc1 %t/b.pcm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cppm + //--- foo.h namespace n { diff --git a/clang/test/Modules/pr62796.cppm b/clang/test/Modules/pr62796.cppm index f96e54bc6adede..58b72164e88bfc 100644 --- a/clang/test/Modules/pr62796.cppm +++ b/clang/test/Modules/pr62796.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fmodule-file=Fibonacci.Cache=%t/Cache.pcm \ // RUN: -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Cache.cppm -o %t/Cache.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fmodule-file=Fibonacci.Cache=%t/Cache.pcm \ +// RUN: -fsyntax-only -verify + //--- Cache.cppm export module Fibonacci.Cache; diff --git a/clang/test/Modules/pr62943.cppm b/clang/test/Modules/pr62943.cppm index 27868b78220f5c..c3a373814a4398 100644 --- a/clang/test/Modules/pr62943.cppm +++ b/clang/test/Modules/pr62943.cppm @@ -9,6 +9,18 @@ // RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface \ +// RUN: -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t \ +// RUN: -fsyntax-only -verify + //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/pr63544.cppm b/clang/test/Modules/pr63544.cppm index 16224cfd010949..f079abaed09df8 100644 --- a/clang/test/Modules/pr63544.cppm +++ b/clang/test/Modules/pr63544.cppm @@ -8,6 +8,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++23 %t/pr63544.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-reduced-module-interface -o %t/m-a.pcm +// RUN: %clang_cc1 -std=c++23 %t/b.cppm -emit-reduced-module-interface -o %t/m-b.pcm +// RUN: %clang_cc1 -std=c++23 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++23 %t/pr63544.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + + //--- foo.h namespace std { diff --git a/clang/test/Modules/pr63595.cppm b/clang/test/Modules/pr63595.cppm index 13a5f84a3e71f2..7c5395e065de54 100644 --- a/clang/test/Modules/pr63595.cppm +++ b/clang/test/Modules/pr63595.cppm @@ -6,6 +6,16 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -I%t %t/module2.cppm -o %t/module2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/merge.cpp -verify -fsyntax-only +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -I%t %t/module1.cppm -o %t/module1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -I%t %t/module2.cppm -o %t/module2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/merge.cpp -verify -fsyntax-only + + //--- header.h namespace NS { template diff --git a/clang/test/Modules/pr67627.cppm b/clang/test/Modules/pr67627.cppm index 3d4410229080a9..d3f8496c47c2a7 100644 --- a/clang/test/Modules/pr67627.cppm +++ b/clang/test/Modules/pr67627.cppm @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 %t/B.cppm -fmodule-file=A=%t/A.pcm -fsyntax-only -verify +// RUN: rm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fmodule-file=A=%t/A.pcm -fsyntax-only -verify + //--- A.cppm export module A; diff --git a/clang/test/Modules/pr67893.cppm b/clang/test/Modules/pr67893.cppm index 00b024ecc2eb11..58990cec01d666 100644 --- a/clang/test/Modules/pr67893.cppm +++ b/clang/test/Modules/pr67893.cppm @@ -9,6 +9,15 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ // RUN: -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm +// Test again with reduced BMI +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \ +// RUN: -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm + //--- a.cppm export module a; export struct A { diff --git a/clang/test/Modules/predefined.cpp b/clang/test/Modules/predefined.cpp index fbe0c4e23ca59c..8f897f5ace938f 100644 --- a/clang/test/Modules/predefined.cpp +++ b/clang/test/Modules/predefined.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -x c++ -std=c++20 -emit-module-interface a.h -o a.pcm -fms-extensions -verify // RUN: %clang_cc1 -std=c++20 a.cpp -fmodule-file=A=a.pcm -fms-extensions -fsyntax-only -verify +// RUN: %clang_cc1 -x c++ -std=c++20 -emit-reduced-module-interface a.h -o a.pcm -fms-extensions -verify +// RUN: %clang_cc1 -std=c++20 a.cpp -fmodule-file=A=a.pcm -fms-extensions -fsyntax-only -verify + //--- a.h // expected-no-diagnostics diff --git a/clang/test/Modules/preferred_name.cppm b/clang/test/Modules/preferred_name.cppm index 46ad96cb1abc33..2f17058678455c 100644 --- a/clang/test/Modules/preferred_name.cppm +++ b/clang/test/Modules/preferred_name.cppm @@ -8,6 +8,16 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only + +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only // //--- foo.h template diff --git a/clang/test/Modules/redefinition-merges.cppm b/clang/test/Modules/redefinition-merges.cppm index 9ab4006f985fa9..13032b22ee60e4 100644 --- a/clang/test/Modules/redefinition-merges.cppm +++ b/clang/test/Modules/redefinition-merges.cppm @@ -12,6 +12,12 @@ // RUN: %clang_cc1 -std=c++20 -I%t %t/M.cppm -emit-module-interface -o %t/M.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use1.cpp -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use2.cpp -verify -fsyntax-only + +// / Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -I%t %t/M.cppm -emit-reduced-module-interface -o %t/M.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use1.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use2.cpp -verify -fsyntax-only + // //--- foo.h #ifndef FOO diff --git a/clang/test/Modules/redundant-template-default-arg.cpp b/clang/test/Modules/redundant-template-default-arg.cpp index 6807b45e513954..20a806c4c818ab 100644 --- a/clang/test/Modules/redundant-template-default-arg.cpp +++ b/clang/test/Modules/redundant-template-default-arg.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t -fsyntax-only -verify + //--- foo.h template T u; diff --git a/clang/test/Modules/redundant-template-default-arg2.cpp b/clang/test/Modules/redundant-template-default-arg2.cpp index 41deb112cfa6ea..ae1f0c7e69cc06 100644 --- a/clang/test/Modules/redundant-template-default-arg2.cpp +++ b/clang/test/Modules/redundant-template-default-arg2.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -fsyntax-only -verify + //--- foo.cppm export module foo; export template diff --git a/clang/test/Modules/redundant-template-default-arg3.cpp b/clang/test/Modules/redundant-template-default-arg3.cpp index 8bb222ac91ffce..e4464c40e97687 100644 --- a/clang/test/Modules/redundant-template-default-arg3.cpp +++ b/clang/test/Modules/redundant-template-default-arg3.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t/. -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t/. -fsyntax-only -verify + //--- foo.h template T v; diff --git a/clang/test/Modules/search-partitions.cpp b/clang/test/Modules/search-partitions.cpp index 571160def7e9b7..92732958db94e6 100644 --- a/clang/test/Modules/search-partitions.cpp +++ b/clang/test/Modules/search-partitions.cpp @@ -14,6 +14,22 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/moduleA.cpp \ // RUN: -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition1.cpp \ +// RUN: -o %t/A-Part1.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition2.cpp \ +// RUN: -o %t/A-Part2.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition3.cpp \ +// RUN: -o %t/A-Part3.pcm + +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/moduleA.cpp -fprebuilt-module-path=%t + // expected-no-diagnostics //--- partition1.cpp diff --git a/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm b/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm index e32da39d9df1af..1465c33c3625c8 100644 --- a/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm +++ b/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm @@ -12,6 +12,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/use.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-reduced-module-interface -o %t/package-base.pcm +// RUN: %clang_cc1 -std=c++20 %t/child.cppm -emit-reduced-module-interface -o %t/package-child.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/package.cppm -emit-reduced-module-interface -o %t/package.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + //--- base.cppm export module package:base; diff --git a/clang/test/Modules/template-function-specialization.cpp b/clang/test/Modules/template-function-specialization.cpp index 3eac92e7edb94c..1b6bf2de6ba1d9 100644 --- a/clang/test/Modules/template-function-specialization.cpp +++ b/clang/test/Modules/template-function-specialization.cpp @@ -4,7 +4,10 @@ // // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/foo.cppm -o %t/foo.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only -// + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/foo.cppm -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- foo.cppm module; # 3 __FILE__ 1 // use the next physical line number here (and below) diff --git a/clang/test/Modules/template-lambdas.cppm b/clang/test/Modules/template-lambdas.cppm index 69117a1a04fc7b..e82cb1f3ad85ac 100644 --- a/clang/test/Modules/template-lambdas.cppm +++ b/clang/test/Modules/template-lambdas.cppm @@ -12,6 +12,21 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ // RUN: -verify -DUSE_LAMBDA2 +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/template_lambdas.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify +// +// RUN: %clang_cc1 -std=c++20 %t/template_lambdas2.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify -DUSE_LAMBDA2 + //--- lambdas.h auto l1 = []() constexpr -> int { return I; diff --git a/clang/test/Modules/template-pack.cppm b/clang/test/Modules/template-pack.cppm index eca17f31f015e5..278c1c2d54ccf5 100644 --- a/clang/test/Modules/template-pack.cppm +++ b/clang/test/Modules/template-pack.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h namespace std diff --git a/clang/test/Modules/template_default_argument.cpp b/clang/test/Modules/template_default_argument.cpp index 5a7d1c04cf1817..202f8dd40d7a94 100644 --- a/clang/test/Modules/template_default_argument.cpp +++ b/clang/test/Modules/template_default_argument.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- templ.h template diff --git a/clang/unittests/Sema/SemaNoloadLookupTest.cpp b/clang/unittests/Sema/SemaNoloadLookupTest.cpp index b24c72cba407f3..cf89c7331e4e0f 100644 --- a/clang/unittests/Sema/SemaNoloadLookupTest.cpp +++ b/clang/unittests/Sema/SemaNoloadLookupTest.cpp @@ -64,7 +64,7 @@ class NoloadLookupTest : public ::testing::Test { CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); std::string CacheBMIPath = - llvm::Twine(TestDir + "/" + ModuleName + " .pcm").str(); + llvm::Twine(TestDir + "/" + ModuleName + ".pcm").str(); std::string PrebuiltModulePath = "-fprebuilt-module-path=" + TestDir.str().str(); const char *Args[] = {"clang++", @@ -75,9 +75,7 @@ class NoloadLookupTest : public ::testing::Test { TestDir.c_str(), "-I", TestDir.c_str(), - FileName.c_str(), - "-o", - CacheBMIPath.c_str()}; + FileName.c_str()}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); EXPECT_TRUE(Invocation); @@ -85,7 +83,8 @@ class NoloadLookupTest : public ::testing::Test { CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + GenerateReducedModuleInterfaceAction Action; EXPECT_TRUE(Instance.ExecuteAction(Action)); EXPECT_FALSE(Diags->hasErrorOccurred()); diff --git a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp index ed0daa43436eb6..ad8892b8c8be1e 100644 --- a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp +++ b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp @@ -69,9 +69,9 @@ export int aa = 43; CIOpts.Diags = Diags; CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "a.cppm", "-o", BMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "a.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); EXPECT_TRUE(Invocation); @@ -88,6 +88,8 @@ export int aa = 43; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); + Instance.getFrontendOpts().OutputFile = BMIPath; + if (auto VFSWithRemapping = createVFSFromCompilerInvocation( Instance.getInvocation(), Instance.getDiagnostics(), CIOpts.VFS)) CIOpts.VFS = VFSWithRemapping; @@ -95,7 +97,7 @@ export int aa = 43; Instance.getHeaderSearchOpts().ValidateASTInputFilesContent = true; - GenerateModuleInterfaceAction Action; + GenerateReducedModuleInterfaceAction Action; EXPECT_TRUE(Instance.ExecuteAction(Action)); EXPECT_FALSE(Diags->hasErrorOccurred()); } diff --git a/clang/unittests/Serialization/NoCommentsTest.cpp b/clang/unittests/Serialization/NoCommentsTest.cpp index 2632a6337807ac..a0a564aeff9a15 100644 --- a/clang/unittests/Serialization/NoCommentsTest.cpp +++ b/clang/unittests/Serialization/NoCommentsTest.cpp @@ -90,9 +90,9 @@ void foo() {} CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); std::string CacheBMIPath = llvm::Twine(TestDir + "/Comments.pcm").str(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "Comments.cppm", "-o", CacheBMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "Comments.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); ASSERT_TRUE(Invocation); @@ -100,7 +100,8 @@ void foo() {} CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + GenerateReducedModuleInterfaceAction Action; ASSERT_TRUE(Instance.ExecuteAction(Action)); ASSERT_FALSE(Diags->hasErrorOccurred()); diff --git a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp index 7efa1c1d64a964..5cbbfb9ff003b3 100644 --- a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp +++ b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp @@ -96,10 +96,9 @@ export namespace Fibonacci CIOpts.Diags = Diags; CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); - std::string CacheBMIPath = llvm::Twine(TestDir + "/Cached.pcm").str(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "Cached.cppm", "-o", CacheBMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "Cached.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); ASSERT_TRUE(Invocation); @@ -108,7 +107,11 @@ export namespace Fibonacci CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + + std::string CacheBMIPath = llvm::Twine(TestDir + "/Cached.pcm").str(); + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + + GenerateReducedModuleInterfaceAction Action; ASSERT_TRUE(Instance.ExecuteAction(Action)); ASSERT_FALSE(Diags->hasErrorOccurred()); From a4703e5bccc63eca45e0e4bff8f04f82d8622ebe Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Fri, 8 Mar 2024 10:19:07 +0800 Subject: [PATCH 144/158] [clang] Remove std::move in GenerateModuleAction::CreateMultiplexConsumer (NFC) llvm-project/clang/lib/Frontend/FrontendActions.cpp:213:10: error: moving a local object in a return statement prevents copy elision [-Werror,-Wpessimizing-move] 213 | return std::move(Consumers); | ^ /Users/jiefu/llvm-project/clang/lib/Frontend/FrontendActions.cpp:213:10: note: remove std::move call here 213 | return std::move(Consumers); | ^~~~~~~~~~ ~ 1 error generated. --- clang/lib/Frontend/FrontendActions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index cd9b9923421c69..50338bfa670f83 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -210,7 +210,7 @@ GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI, +CI.getFrontendOpts().BuildingImplicitModule)); Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator( CI, std::string(InFile), OutputFile, std::move(OS), Buffer)); - return std::move(Consumers); + return Consumers; } std::unique_ptr From 448419007e1bb8a9e2edfe2c1c87fef6d104442a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Nov 2023 05:34:05 +0100 Subject: [PATCH 145/158] update_test_checks: precommit a test case The test case demonstrates how meta variables are needlessly renamed, making diffs harder to read. --- .../Inputs/stable_ir_values.ll | 22 ++++++++++ .../Inputs/stable_ir_values.ll.expected | 23 ++++++++++ .../Inputs/stable_ir_values2.ll | 30 +++++++++++++ .../Inputs/stable_ir_values2.ll.expected | 26 ++++++++++++ .../Inputs/stable_ir_values3.ll | 38 +++++++++++++++++ .../Inputs/stable_ir_values3.ll.expected | 38 +++++++++++++++++ .../Inputs/stable_ir_values4.ll | 41 ++++++++++++++++++ .../Inputs/stable_ir_values4.ll.expected | 42 +++++++++++++++++++ .../update_test_checks/stable_ir_values.test | 2 + .../update_test_checks/stable_ir_values2.test | 2 + .../update_test_checks/stable_ir_values3.test | 2 + .../update_test_checks/stable_ir_values4.test | 2 + 12 files changed, 268 insertions(+) create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll new file mode 100644 index 00000000000000..8457bf7dc40a2e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected new file mode 100644 index 00000000000000..5142e3ed32ba45 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll new file mode 100644 index 00000000000000..d05c26241f87c1 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + ret i32 %6 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected new file mode 100644 index 00000000000000..53f60bda8ee591 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + ret i32 %6 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll new file mode 100644 index 00000000000000..3b449291d0e7f8 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; Test that we don't regress diff quality by trying to keep variable names +; stable (and messing up the matching). + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + %7 = icmp eq i32 %x, 1 + %8 = call i32 @foo(i1 %7) + %9 = icmp ne i32 %8, 0 + %10 = select i1 %9, i32 %8, i32 %6 + + ret i32 %10 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected new file mode 100644 index 00000000000000..3d0f772505a659 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; Test that we don't regress diff quality by trying to keep variable names +; stable (and messing up the matching). + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + %7 = icmp eq i32 %x, 1 + %8 = call i32 @foo(i1 %7) + %9 = icmp ne i32 %8, 0 + %10 = select i1 %9, i32 %8, i32 %6 + + ret i32 %10 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll new file mode 100644 index 00000000000000..e3d8452f963101 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; A test that hits the quadratic runtime prevention in the diff algorithm and +; a more complex case of name conflict avoidance. + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @foo(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @foo(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i32 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) +; CHECK-NEXT: ret i32 [[TMP12]] +; + %1 = mul i32 %x, 3 + %2 = call i32 @foo(i32 %1) + %3 = call i32 @foo(i32 %2) + %4 = call i32 @foo(i32 %3) + %5 = call i32 @foo(i32 %4) + %6 = call i32 @foo(i32 %5) + %7 = call i32 @foo(i32 %6) + %8 = xor i32 %7, 1 + %9 = call i32 @foo(i32 %8) + %10 = add i32 %9, 1 + %11 = call i32 @foo(i32 %10) + %12 = call i32 @foo(i32 %11) + %13 = call i32 @foo(i32 %12) + + ret i32 %13 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected new file mode 100644 index 00000000000000..5962bdafd9ea0a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; A test that hits the quadratic runtime prevention in the diff algorithm and +; a more complex case of name conflict avoidance. + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @foo(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @foo(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP12]]) +; CHECK-NEXT: ret i32 [[TMP13]] +; + %1 = mul i32 %x, 3 + %2 = call i32 @foo(i32 %1) + %3 = call i32 @foo(i32 %2) + %4 = call i32 @foo(i32 %3) + %5 = call i32 @foo(i32 %4) + %6 = call i32 @foo(i32 %5) + %7 = call i32 @foo(i32 %6) + %8 = xor i32 %7, 1 + %9 = call i32 @foo(i32 %8) + %10 = add i32 %9, 1 + %11 = call i32 @foo(i32 %10) + %12 = call i32 @foo(i32 %11) + %13 = call i32 @foo(i32 %12) + + ret i32 %13 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test new file mode 100644 index 00000000000000..c6287a6b29ca92 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test new file mode 100644 index 00000000000000..3cebcd52f00521 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values2.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values2.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test new file mode 100644 index 00000000000000..83bc80128541f3 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values3.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values3.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test new file mode 100644 index 00000000000000..89f252f8078064 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values4.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values4.ll.expected From fc1e5f978561d6357083f9b5bc981e52d93374b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 23 Dec 2023 14:07:47 +0100 Subject: [PATCH 146/158] update_test_checks: simplify is_local_def_ir_value The match argument is unused. --- llvm/utils/UpdateTestChecks/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 53777523ec2a58..72d21cf4ca7dbc 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -933,7 +933,7 @@ def __init__( self.variable_mapping = {} # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. - def is_local_def_ir_value_match(self, match): + def is_local_def_ir_value(self): return self.ir_prefix == "%" # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. @@ -949,7 +949,7 @@ def get_ir_prefix_from_ir_value_match(self, match): # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals def get_ir_regex_from_ir_value_re_match(self, match): # for backwards compatibility we check locals with '.*' - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return ".*" return self.ir_regexp @@ -990,7 +990,7 @@ def get_value_definition(self, var, match): else: regex = self.get_ir_regex_from_ir_value_re_match(match) capture_start = "[[" - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return capture_start + varname + ":" + prefix + regex + "]]" return prefix + capture_start + varname + ":" + regex + "]]" @@ -999,7 +999,7 @@ def get_value_use(self, var, match, var_prefix=None): if var_prefix is None: var_prefix = self.check_prefix capture_start = "[[#" if self.is_number else "[[" - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return capture_start + self.get_value_name(var, var_prefix) + "]]" prefix = self.get_ir_prefix_from_ir_value_match(match)[0] return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]" @@ -1209,7 +1209,7 @@ def transform_line_vars(match): " with scripted FileCheck name." % (var,) ) key = (var, nameless_value.check_key) - is_local_def = nameless_value.is_local_def_ir_value_match(match) + is_local_def = nameless_value.is_local_def_ir_value() if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: From 5747f9d3e9f3a49c27984b9341e123b9ca82e741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 23 Dec 2023 14:20:43 +0100 Subject: [PATCH 147/158] update_test_checks: simplify get_ir_regex The match argument isn't used. --- llvm/utils/UpdateTestChecks/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 72d21cf4ca7dbc..a3365fef5f6e7d 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -947,7 +947,7 @@ def get_ir_prefix_from_ir_value_match(self, match): return re.search(self.ir_prefix, match[0])[0], self.check_prefix # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals - def get_ir_regex_from_ir_value_re_match(self, match): + def get_ir_regex(self): # for backwards compatibility we check locals with '.*' if self.is_local_def_ir_value(): return ".*" @@ -988,7 +988,7 @@ def get_value_definition(self, var, match): regex = "" # always capture a number in the default format capture_start = "[[#" else: - regex = self.get_ir_regex_from_ir_value_re_match(match) + regex = self.get_ir_regex() capture_start = "[[" if self.is_local_def_ir_value(): return capture_start + varname + ":" + prefix + regex + "]]" From fb02f9ac84a6151e41aba8f7391edd132a9aaf14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Nov 2023 06:46:07 +0100 Subject: [PATCH 148/158] update_test_checks: keep meta variables stable by default Prior to this change, running UTC on larger tests, especially tests with unnamed IR values, often resulted in a spuriously large diff because e.g. TMPnn variables in the CHECK lines were renumbered. This change attempts to reduce the diff by keeping those variable names the same. There are cases in which this "drift" of variable names can end up being more confusing. The old behavior can be re-enabled with the --reset-variable-names command line argument. The improvement may not be immediately apparent in the diff of this change. The point is that the diff of stable_ir_values.ll against stable_ir_values.ll.expected after this change is smaller. Ideally, we'd also keep meta variables for "global" objects stable, e.g. for attributes (#nn) and metadata (!nn). However, that would require a much more substantial refactoring of how we generate check lines, so I left it for future work. --- .../Inputs/stable_ir_values.ll.expected | 10 +- .../Inputs/stable_ir_values.ll.expected.reset | 23 + .../Inputs/stable_ir_values2.ll.expected | 12 +- .../Inputs/stable_ir_values3.ll | 3 - .../Inputs/stable_ir_values3.ll.expected | 3 - .../Inputs/stable_ir_values4.ll.expected | 8 +- .../update_test_checks/stable_ir_values.test | 3 + llvm/utils/UpdateTestChecks/common.py | 513 +++++++++++++++++- llvm/utils/update_test_checks.py | 22 +- 9 files changed, 554 insertions(+), 43 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected index 5142e3ed32ba45..3549a4d76aa762 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected @@ -9,11 +9,11 @@ define i32 @func({i32, i32} %x, i32 %y) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] ; %x.i34 = extractvalue {i32, i32} %x, 0 %1 = add i32 %y, 1 diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset new file mode 100644 index 00000000000000..5142e3ed32ba45 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected index 53f60bda8ee591..6311a55a1f9de1 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected @@ -5,12 +5,12 @@ define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] ; %1 = icmp eq i32 %x, 0 %2 = call i32 @foo(i1 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll index 3b449291d0e7f8..a4f4fc67f78d3f 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s -; Test that we don't regress diff quality by trying to keep variable names -; stable (and messing up the matching). - define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected index 3d0f772505a659..08d3c22172ee3f 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s -; Test that we don't regress diff quality by trying to keep variable names -; stable (and messing up the matching). - define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected index 5962bdafd9ea0a..e3fa51598c48e3 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected @@ -15,12 +15,12 @@ define i32 @func(i32 %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP12]]) -; CHECK-NEXT: ret i32 [[TMP13]] +; CHECK-NEXT: ret i32 [[TMP12]] ; %1 = mul i32 %x, 3 %2 = call i32 @foo(i32 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test index c6287a6b29ca92..4dfaf5d25c8a69 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test @@ -1,2 +1,5 @@ # RUN: cp -f %S/Inputs/stable_ir_values.ll %t.ll && %update_test_checks %t.ll # RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected +# Now test that we can reset all the names +# RUN: %update_test_checks %t.ll --reset-variable-names +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index a3365fef5f6e7d..f766d541c79c02 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,6 +1,8 @@ from __future__ import print_function import argparse +import bisect +import collections import copy import glob import itertools @@ -10,7 +12,7 @@ import sys import shlex -from typing import List +from typing import List, Mapping, Set ##### Common utilities for update_*test_checks.py @@ -420,6 +422,48 @@ def should_add_line_to_output( return True +def collect_original_check_lines(ti: TestInfo, prefix_set: set): + """ + Collect pre-existing check lines into a dictionary `result` which is + returned. + + result[func_name][prefix] is filled with a list of right-hand-sides of check + lines. + """ + result = {} + + current_function = None + for input_line_info in ti.ro_iterlines(): + input_line = input_line_info.line + if current_function is not None: + if input_line == "": + continue + if input_line.lstrip().startswith(";"): + m = CHECK_RE.match(input_line) + if ( + m is not None + and m.group(1) in prefix_set + and m.group(2) not in ["LABEL", "SAME"] + ): + if m.group(1) not in current_function: + current_function[m.group(1)] = [] + current_function[m.group(1)].append(input_line[m.end() :].strip()) + continue + current_function = None + + m = IR_FUNCTION_RE.match(input_line) + if m is not None: + func_name = m.group(1) + if ti.args.function is not None and func_name != ti.args.function: + # When filtering on a specific function, skip all others. + continue + + assert func_name not in result + current_function = result[func_name] = {} + + return result + + # Perform lit-like substitutions def getSubstitutions(sourcepath): sourcedir = os.path.dirname(sourcepath) @@ -491,7 +535,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") CHECK_RE = re.compile( - r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" + r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" ) CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") @@ -1187,20 +1231,325 @@ def may_clash_with_default_check_prefix_name(check_prefix, var): ) +def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: + """ + Find a large ordered matching between strings in lhs and rhs. + + Think of this as finding the *unchanged* lines in a diff, where the entries + of lhs and rhs are lines of the files being diffed. + + Returns a list of matched (lhs_idx, rhs_idx) pairs. + """ + + if not lhs or not rhs: + return [] + + # Collect matches in reverse order. + matches = [] + + # First, collect a set of candidate matching edges. We limit this to a + # constant multiple of the input size to avoid quadratic runtime. + patterns = collections.defaultdict(lambda: ([], [])) + + for idx in range(len(lhs)): + patterns[lhs[idx]][0].append(idx) + for idx in range(len(rhs)): + patterns[rhs[idx]][1].append(idx) + + multiple_patterns = [] + + candidates = [] + for pattern in patterns.values(): + if not pattern[0] or not pattern[1]: + continue + + if len(pattern[0]) == len(pattern[1]) == 1: + candidates.append((pattern[0][0], pattern[1][0])) + else: + multiple_patterns.append(pattern) + + multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) + + for pattern in multiple_patterns: + if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( + len(lhs) + len(rhs) + ): + break + for lhs_idx in pattern[0]: + for rhs_idx in pattern[1]: + candidates.append((lhs_idx, rhs_idx)) + + if not candidates: + # The LHS and RHS either share nothing in common, or lines are just too + # identical. In that case, let's give up and not match anything. + return [] + + # Compute a maximal crossing-free matching via an algorithm that is + # inspired by a mixture of dynamic programming and line-sweeping in + # discrete geometry. + # + # I would be surprised if this algorithm didn't exist somewhere in the + # literature, but I found it without consciously recalling any + # references, so you'll have to make do with the explanation below. + # Sorry. + # + # The underlying graph is bipartite: + # - nodes on the LHS represent lines in the original check + # - nodes on the RHS represent lines in the new (updated) check + # + # Nodes are implicitly sorted by the corresponding line number. + # Edges (unique_matches) are sorted by the line number on the LHS. + # + # Here's the geometric intuition for the algorithm. + # + # * Plot the edges as points in the plane, with the original line + # number on the X axis and the updated line number on the Y axis. + # * The goal is to find a longest "chain" of points where each point + # is strictly above and to the right of the previous point. + # * The algorithm proceeds by sweeping a vertical line from left to + # right. + # * The algorithm maintains a table where `table[N]` answers the + # question "What is currently the 'best' way to build a chain of N+1 + # points to the left of the vertical line". Here, 'best' means + # that the last point of the chain is a as low as possible (minimal + # Y coordinate). + # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of + # the last point in the chain and `y` is its Y coordinate + # * A key invariant is that the Y values in the table are + # monotonically increasing + # * Thanks to these properties, the table can be used to answer the + # question "What is the longest chain that can be built to the left + # of the vertical line using only points below a certain Y value", + # using a binary search over the table. + # * The algorithm also builds a backlink structure in which every point + # links back to the previous point on a best (longest) chain ending + # at that point + # + # The core loop of the algorithm sweeps the line and updates the table + # and backlink structure for every point that we cross during the sweep. + # Therefore, the algorithm is trivially O(M log M) in the number of + # points. + candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) + + backlinks = [] + table = [] + for _, rhs_idx in candidates: + candidate_idx = len(backlinks) + ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0]) + + # Update the table to record a best chain ending in the current point. + # There always is one, and if any of the previously visited points had + # a higher Y coordinate, then there is always a previously recorded best + # chain that can be improved upon by using the current point. + # + # There is only one case where there is some ambiguity. If the + # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as + # the current point (this can only happen if the same line appeared + # multiple times on the LHS), then we could choose to keep the + # previously recorded best chain instead. That would bias the algorithm + # differently but should have no systematic impact on the quality of the + # result. + if ti < len(table): + table[ti] = (rhs_idx, candidate_idx) + else: + table.append((rhs_idx, candidate_idx)) + if ti > 0: + backlinks.append(table[ti - 1][1]) + else: + backlinks.append(None) + + # Commit to names in the matching by walking the backlinks. Recursively + # attempt to fill in more matches in-betweem. + match_idx = table[-1][1] + while match_idx is not None: + current = candidates[match_idx] + matches.append(current) + match_idx = backlinks[match_idx] + + matches.reverse() + return matches + + +VARIABLE_TAG = "[[@@]]" +METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") +NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") + + +class CheckValueInfo: + def __init__( + self, + nameless_value: NamelessValue, + var: str, + prefix: str, + ): + self.nameless_value = nameless_value + self.var = var + self.prefix = prefix + + +# Represent a check line in a way that allows us to compare check lines while +# ignoring some or all of the FileCheck variable names. +class CheckLineInfo: + def __init__(self, line, values): + # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG + self.line: str = line + + # Information on each FileCheck variable name occurrences in the line + self.values: List[CheckValueInfo] = values + + def __repr__(self): + return f"CheckLineInfo(line={self.line}, self.values={self.values})" + + +def remap_metavar_names( + old_line_infos: List[CheckLineInfo], + new_line_infos: List[CheckLineInfo], + committed_names: Set[str], +) -> Mapping[str, str]: + """ + Map all FileCheck variable names that appear in new_line_infos to new + FileCheck variable names in an attempt to reduce the diff from old_line_infos + to new_line_infos. + + This is done by: + * Matching old check lines and new check lines using a diffing algorithm + applied after replacing names with wildcards. + * Committing to variable names such that the matched lines become equal + (without wildcards) if possible + * This is done recursively to handle cases where many lines are equal + after wildcard replacement + """ + # Initialize uncommitted identity mappings + new_mapping = {} + for line in new_line_infos: + for value in line.values: + new_mapping[value.var] = value.var + + # Recursively commit to the identity mapping or find a better one + def recurse(old_begin, old_end, new_begin, new_end): + if old_begin == old_end or new_begin == new_end: + return + + # Find a matching of lines where uncommitted names are replaced + # with a placeholder. + def diffify_line(line, mapper): + values = [] + for value in line.values: + mapped = mapper(value.var) + values.append(mapped if mapped in committed_names else "?") + return line.line.strip() + " @@@ " + " @ ".join(values) + + lhs_lines = [ + diffify_line(line, lambda x: x) + for line in old_line_infos[old_begin:old_end] + ] + rhs_lines = [ + diffify_line(line, lambda x: new_mapping[x]) + for line in new_line_infos[new_begin:new_end] + ] + + candidate_matches = find_diff_matching(lhs_lines, rhs_lines) + + # Apply commits greedily on a match-by-match basis + matches = [(-1, -1)] + committed_anything = False + for lhs_idx, rhs_idx in candidate_matches: + lhs_line = old_line_infos[lhs_idx] + rhs_line = new_line_infos[rhs_idx] + + local_commits = {} + + for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): + if new_mapping[rhs_value.var] in committed_names: + # The new value has already been committed. If it was mapped + # to the same name as the original value, we can consider + # committing other values from this line. Otherwise, we + # should ignore this line. + if new_mapping[rhs_value.var] == lhs_value.var: + continue + else: + break + + if rhs_value.var in local_commits: + # Same, but for a possible commit happening on the same line + if local_commits[rhs_value.var] == lhs_value.var: + continue + else: + break + + if lhs_value.var in committed_names: + # We can't map this value because the name we would map it to has already been + # committed for something else. Give up on this line. + break + + local_commits[rhs_value.var] = lhs_value.var + else: + # No reason not to add any commitments for this line + for rhs_var, lhs_var in local_commits.items(): + new_mapping[rhs_var] = lhs_var + committed_names.add(lhs_var) + committed_anything = True + + if ( + lhs_var != rhs_var + and lhs_var in new_mapping + and new_mapping[lhs_var] == lhs_var + ): + new_mapping[lhs_var] = "conflict_" + lhs_var + + matches.append((lhs_idx, rhs_idx)) + + matches.append((old_end, new_end)) + + # Recursively handle sequences between matches + if committed_anything: + for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): + recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) + + recurse(0, len(old_line_infos), 0, len(new_line_infos)) + + # Commit to remaining names and resolve conflicts + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + if not mapped_name.startswith("conflict_"): + assert mapped_name == new_name + committed_names.add(mapped_name) + + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + assert mapped_name.startswith("conflict_") + + m = NUMERIC_SUFFIX_RE.search(new_name) + base_name = new_name[: m.start()] + suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 + while True: + candidate = f"{base_name}{suffix}" + if candidate not in committed_names: + new_mapping[new_name] = candidate + committed_names.add(candidate) + break + suffix += 1 + + return new_mapping + + def generalize_check_lines_common( lines, is_analyze, vars_seen, global_vars_seen, nameless_values, - nameless_value_regex, + nameless_value_regex: re.Pattern, is_asm, preserve_names, + original_check_lines=None, ): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. - def transform_line_vars(match): + def transform_line_vars(match, transform_locals=True): var = get_name_from_ir_value_match(match) nameless_value = get_nameless_value_from_match(match, nameless_values) if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): @@ -1210,6 +1559,8 @@ def transform_line_vars(match): ) key = (var, nameless_value.check_key) is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def and not transform_locals: + return None if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: @@ -1228,13 +1579,15 @@ def transform_line_vars(match): # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) - lines_with_def = [] + def transform_non_local_line_vars(match): + return transform_line_vars(match, False) + multiple_braces_re = re.compile(r"({{+)|(}}+)") def escape_braces(match_obj): return '{{' + re.escape(match_obj.group(0)) + '}}' - for i, line in enumerate(lines): - if not is_asm and not is_analyze: + if not is_asm and not is_analyze: + for i, line in enumerate(lines): # An IR variable named '%.' matches the FileCheck regex string. line = line.replace("%.", "%dot") for regex in _global_hex_value_regex: @@ -1252,25 +1605,136 @@ def escape_braces(match_obj): # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) lines[i] = scrubbed_line - if not preserve_names: - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn( - transform_line_vars, lines[i], count=1 - ) - if is_analyze: + + if not preserve_names: + if is_asm: + for i, _ in enumerate(lines): + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = nameless_value_regex.subn( + transform_line_vars, lines[i], count=1 + ) + else: + # LLVM IR case. Start by handling global meta variables (global IR variables, + # metadata, attributes) + for i, _ in enumerate(lines): + start = 0 + while True: + m = nameless_value_regex.search(lines[i][start:]) + if m is None: + break + start += m.start() + sub = transform_non_local_line_vars(m) + if sub is not None: + lines[i] = ( + lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] + ) + start += 1 + + # Collect information about new check lines and original check lines (if any) + new_line_infos = [] + for line in lines: + filtered_line = "" + values = [] + while True: + m = nameless_value_regex.search(line) + if m is None: + filtered_line += line + break + + var = get_name_from_ir_value_match(m) + nameless_value = get_nameless_value_from_match(m, nameless_values) + var = nameless_value.get_value_name( + var, nameless_value.check_prefix + ) + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += ( + line[: m.start()] + + m.group(1) + + VARIABLE_TAG + + m.group(m.lastindex) + ) + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=nameless_value, + var=var, + prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ + 0 + ], + ) + ) + new_line_infos.append(CheckLineInfo(filtered_line, values)) + + orig_line_infos = [] + for line in original_check_lines or []: + filtered_line = "" + values = [] + while True: + m = METAVAR_RE.search(line) + if m is None: + filtered_line += line + break + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += line[: m.start()] + VARIABLE_TAG + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=None, + var=m.group(1), + prefix=None, + ) + ) + orig_line_infos.append(CheckLineInfo(filtered_line, values)) + + # Compute the variable name mapping + committed_names = set(vars_seen) + + mapping = remap_metavar_names( + orig_line_infos, new_line_infos, committed_names + ) + + for i, line_info in enumerate(new_line_infos): + line_template = line_info.line + line = "" + + for value in line_info.values: + idx = line_template.find(VARIABLE_TAG) + line += line_template[:idx] + line_template = line_template[idx + len(VARIABLE_TAG) :] + + key = (mapping[value.var], nameless_value.check_key) + is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def: + if mapping[value.var] in vars_seen: + line += f"[[{mapping[value.var]}]]" + else: + line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" + vars_seen.add(mapping[value.var]) + else: + raise RuntimeError("not implemented") + + line += line_template + + lines[i] = line + + if is_analyze: + for i, _ in enumerate(lines): # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) lines[i] = scrubbed_line + return lines # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines( - lines, is_analyze, vars_seen, global_vars_seen, preserve_names + lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines ): return generalize_check_lines_common( lines, @@ -1281,6 +1745,7 @@ def generalize_check_lines( IR_VALUE_RE, False, preserve_names, + original_check_lines=original_check_lines, ) @@ -1337,6 +1802,7 @@ def add_checks( global_vars_seen_dict, is_filtered, preserve_names=False, + original_check_lines: Mapping[str, List[str]] = {}, ): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() @@ -1409,6 +1875,7 @@ def add_checks( vars_seen, global_vars_seen, preserve_names, + original_check_lines=[], )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: @@ -1516,7 +1983,12 @@ def add_checks( # to variable naming fashions. else: func_body = generalize_check_lines( - func_body, False, vars_seen, global_vars_seen, preserve_names + func_body, + False, + vars_seen, + global_vars_seen, + preserve_names, + original_check_lines=original_check_lines.get(checkprefix), ) # This could be selectively enabled with an optional invocation argument. @@ -1578,6 +2050,7 @@ def add_ir_checks( version, global_vars_seen_dict, is_filtered, + original_check_lines={}, ): # Label format is based on IR string. if function_sig and version > 1: @@ -1602,6 +2075,7 @@ def add_ir_checks( global_vars_seen_dict, is_filtered, preserve_names, + original_check_lines=original_check_lines, ) @@ -1890,6 +2364,7 @@ def get_autogennote_suffix(parser, args): "llvm_bin", "verbose", "force_update", + "reset_variable_names", ): continue value = getattr(args, action.dest) diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index b5077d79351378..04808ce6bb1c6f 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -85,6 +85,12 @@ def main(): choices=["none", "smart", "all"], help="Check global entries (global variables, metadata, attribute sets, ...) for functions", ) + parser.add_argument( + "--reset-variable-names", + action="store_true", + help="Reset all variable names to correspond closely to the variable names in IR. " + "This tends to result in larger diffs.", + ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) @@ -170,13 +176,19 @@ def main(): ) builder.processed_prefixes(prefixes) + prefix_set = set( + [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] + ) + + if not ti.args.reset_variable_names: + original_check_lines = common.collect_original_check_lines(ti, prefix_set) + else: + original_check_lines = {} + func_dict = builder.finish_and_get_func_dict() is_in_function = False is_in_function_start = False has_checked_pre_function_globals = False - prefix_set = set( - [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] - ) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -230,6 +242,7 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get(func, {}), ), ) ) @@ -261,6 +274,9 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get( + func_name, {} + ), ) ) is_in_function_start = False From 51b7ef937504ce45ef6a024aadfdee4147bffab5 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Thu, 7 Mar 2024 22:04:59 -0500 Subject: [PATCH 149/158] [libc][NFC] Fix a typo in test/src/stdfix/RoundTest.h. (#84411) --- libc/test/src/stdfix/RoundTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/test/src/stdfix/RoundTest.h b/libc/test/src/stdfix/RoundTest.h index 06343addbef20e..d3ae04db9749ba 100644 --- a/libc/test/src/stdfix/RoundTest.h +++ b/libc/test/src/stdfix/RoundTest.h @@ -28,7 +28,7 @@ template class RoundTest : public LIBC_NAMESPACE::testing::Test { void testSpecialNumbers(RoundFunc func) { EXPECT_EQ(zero, func(zero, FXRep::FRACTION_LEN - 5)); - EXPECT_EQ(max, func(min, 0)); + EXPECT_EQ(min, func(min, 0)); EXPECT_EQ(max, func(max, FXRep::FRACTION_LEN)); EXPECT_EQ(one, func(half, 0)); From 66bd3cd75b32ccfa8d228c200cf4fbf72d49fd1f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 7 Mar 2024 19:09:18 -0800 Subject: [PATCH 150/158] [AMDGPU,test] Change llc -march= to -mtriple= PR #75982 had been created before these tests were added, therefore some test were not updated. --- .../AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir | 2 +- .../wmma-gfx12-w32-f16-f32-matrix-modifiers.ll | 2 +- .../AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll | 2 +- .../GlobalISel/wmma-gfx12-w32-iu-modifiers.ll | 2 +- .../wmma-gfx12-w32-swmmac-index_key.ll | 2 +- .../CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll | 2 +- .../wmma-gfx12-w64-f16-f32-matrix-modifiers.ll | 2 +- .../AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll | 2 +- .../GlobalISel/wmma-gfx12-w64-iu-modifiers.ll | 2 +- .../wmma-gfx12-w64-swmmac-index_key.ll | 2 +- .../CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll | 2 +- llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll | 4 ++-- .../CodeGen/AMDGPU/generic-targets-require-v6.ll | 16 ++++++++-------- .../CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll | 4 ++-- .../test/CodeGen/AMDGPU/insert-waitcnts-hang.mir | 2 +- .../AMDGPU/llvm.amdgcn.atomic.cond.sub.ll | 2 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll | 4 ++-- .../AMDGPU/llvm.amdgcn.global.load.tr-w32.ll | 4 ++-- .../AMDGPU/llvm.amdgcn.global.load.tr-w64.ll | 4 ++-- .../CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll | 4 ++-- .../CodeGen/AMDGPU/spill-regpressure-less.mir | 2 +- .../AMDGPU/wait-before-stores-with-scope_sys.ll | 4 ++-- .../wmma-gfx12-w32-f16-f32-matrix-modifiers.ll | 2 +- llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll | 2 +- .../AMDGPU/wmma-gfx12-w32-iu-modifiers.ll | 2 +- .../AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll | 2 +- llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll | 2 +- .../wmma-gfx12-w64-f16-f32-matrix-modifiers.ll | 2 +- llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll | 2 +- .../AMDGPU/wmma-gfx12-w64-iu-modifiers.ll | 2 +- .../AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll | 2 +- llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll | 2 +- .../CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir | 2 +- .../CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir | 2 +- .../convergencectrl/AMDGPU/basic.mir | 2 +- .../convergencectrl/AMDGPU/cycles.mir | 2 +- .../convergencectrl/AMDGPU/mixed2.mir | 2 +- .../convergencectrl/AMDGPU/region-nesting.mir | 2 +- 38 files changed, 52 insertions(+), 52 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir index 8ec2778992e23c..bdfc7c2b25c28b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK --- name: test_neg_one_f16_sgpr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index b29ae366ca1ae5..e500aae7e0f3c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll index 6251dfdc392ebc..3037c1ec2829e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll index fe6d16bd8b5ead..086144873a042f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll index c80d7a6d9a836e..a6e1f5ef12b4bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll index c4edc5b72b2fbb..3aa81da317d67d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index e2831afe68e74b..6c232b680ebf56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll index c4d70fd5f0637f..717a4fc823d518 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll index 7e1d09805df3f6..1ef50cbd0fc7ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll index b6f1828dce2576..0bd255e5e1af49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll index 0d1871a18d4055..7399fa0a341e2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll index 0772f9d0199f22..2c69ae58f0e611 100644 --- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll +++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3), i32) declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32) diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll index 482f61624ec7df..15a696bb3af09f 100644 --- a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -1,12 +1,12 @@ -; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s -; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s ; GFX9-V5: gfx9-generic is only available on code object version 6 or better ; GFX101-V5: gfx10-1-generic is only available on code object version 6 or better diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll index 6c324ddc654667..c69207c0472e7c 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DAG %s -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GISEL %s define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) { ; GCN-LABEL: test_s_load_i8: diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir index 993933b2b5c723..28d79efc00b0db 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s --- name: test diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll index 11bafa197a2f09..9b63a8a3efcf92 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { ; GFX12-LABEL: raw_buffer_atomic_cond_sub_return: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll index 02e27152bf5c59..8ea10f4496a2ef 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) { ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll index 8f1e6f3ac1a0c3..b4415c12926ac3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W32 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W32 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W32 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W32 %s declare <2 x i32> @llvm.amdgcn.global.load.tr.v2i32.p1(ptr addrspace(1)) declare <8 x i16> @llvm.amdgcn.global.load.tr.v8i16.p1(ptr addrspace(1)) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll index d5a45fb838fc7f..7ad1416789de79 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W64 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W64 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W64 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W64 %s declare i32 @llvm.amdgcn.global.load.tr.i32.p1(ptr addrspace(1)) declare <4 x i16> @llvm.amdgcn.global.load.tr.v4i16.p1(ptr addrspace(1)) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll index f03dbb9eb16457..ff8f28dae3f8ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 define amdgpu_ps void @test_bvhcnt() { ; GFX12-LABEL: test_bvhcnt: diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir index f50688240fe8bd..ed57caadea5c56 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s --- | define amdgpu_kernel void @spill_regpressure_less() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll index e6fbe97f8dc0a5..96fa2a45a2ddf0 100644 --- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll +++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GFX12-LABEL: intrinsic_store_system_scope: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 5f662ac088a351..cb3d76cd9c0ba1 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll index c80e5e0e3506cc..c4adc8c3212801 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll index 5426458e6b1df9..dbb4db05a35c5d 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll index b0213abba90485..009288dbdf530a 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll index a66747567dd3d5..1012287838f120 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index 1e82e74d92c4ed..ab1121a705529d 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll index 19b0e697183f4d..462fc01e8e79e2 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll index fa5eb3605e67a3..161d222d10ff70 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll index 861eb1aaa333ab..511a116a78be59 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll index a05a8f4117ecee..5fde11cb4b1b14 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir index 47a1e06c5d7dc5..ef85de20129434 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir +++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0. # $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0 diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir index 34c37aa91ab80f..277db33e940dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir +++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0. # $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0 diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir index 94d0ddad25944d..cb06d90ccd7fdf 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: basic tracksRegLiveness: true diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir index 87cf3e604929bb..d935d8ea4be506 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: cycles body: | diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir index c70a48bf21309e..7893837126e799 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: mixed2 body: | diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir index 9e869acb3e9381..e9588d25d774d2 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: region_nesting body: | From b565126b4dbef0d9f5c9f3ef8f3489ff6581218f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 8 Mar 2024 04:24:28 +0100 Subject: [PATCH 151/158] Revert "update_test_checks: keep meta variables stable by default" This reverts commit fb02f9ac84a6151e41aba8f7391edd132a9aaf14. Looks like some Python version incompatibility, will investigate. --- .../Inputs/stable_ir_values.ll.expected | 10 +- .../Inputs/stable_ir_values.ll.expected.reset | 23 - .../Inputs/stable_ir_values2.ll.expected | 12 +- .../Inputs/stable_ir_values3.ll | 3 + .../Inputs/stable_ir_values3.ll.expected | 3 + .../Inputs/stable_ir_values4.ll.expected | 8 +- .../update_test_checks/stable_ir_values.test | 3 - llvm/utils/UpdateTestChecks/common.py | 513 +----------------- llvm/utils/update_test_checks.py | 22 +- 9 files changed, 43 insertions(+), 554 deletions(-) delete mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected index 3549a4d76aa762..5142e3ed32ba45 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected @@ -9,11 +9,11 @@ define i32 @func({i32, i32} %x, i32 %y) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[Y]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] ; %x.i34 = extractvalue {i32, i32} %x, 0 %1 = add i32 %y, 1 diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset deleted file mode 100644 index 5142e3ed32ba45..00000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset +++ /dev/null @@ -1,23 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt < %s -S | FileCheck %s - -; The assumption underlying this test is that there are pre-existing check lines -; but something has changed, and we would like to avoid needless changes of -; meta variable names so that diffs end up being easier to read, e.g. avoid -; changing X_I33 into X_I34 or renumbering the various TMP variables. - -define i32 @func({i32, i32} %x, i32 %y) { -; CHECK-LABEL: define i32 @func( -; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 -; CHECK-NEXT: ret i32 [[TMP3]] -; - %x.i34 = extractvalue {i32, i32} %x, 0 - %1 = add i32 %y, 1 - %2 = add i32 %x.i34, %1 - %3 = mul i32 %2, 3 - ret i32 %3 -} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected index 6311a55a1f9de1..53f60bda8ee591 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected @@ -5,12 +5,12 @@ define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i1 [[TMP1]]) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] -; CHECK-NEXT: ret i32 [[TMP10]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP6]] ; %1 = icmp eq i32 %x, 0 %2 = call i32 @foo(i1 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll index a4f4fc67f78d3f..3b449291d0e7f8 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s +; Test that we don't regress diff quality by trying to keep variable names +; stable (and messing up the matching). + define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected index 08d3c22172ee3f..3d0f772505a659 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s +; Test that we don't regress diff quality by trying to keep variable names +; stable (and messing up the matching). + define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected index e3fa51598c48e3..5962bdafd9ea0a 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected @@ -15,12 +15,12 @@ define i32 @func(i32 %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP13]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) -; CHECK-NEXT: ret i32 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP12]]) +; CHECK-NEXT: ret i32 [[TMP13]] ; %1 = mul i32 %x, 3 %2 = call i32 @foo(i32 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test index 4dfaf5d25c8a69..c6287a6b29ca92 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test @@ -1,5 +1,2 @@ # RUN: cp -f %S/Inputs/stable_ir_values.ll %t.ll && %update_test_checks %t.ll # RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected -# Now test that we can reset all the names -# RUN: %update_test_checks %t.ll --reset-variable-names -# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index f766d541c79c02..a3365fef5f6e7d 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,8 +1,6 @@ from __future__ import print_function import argparse -import bisect -import collections import copy import glob import itertools @@ -12,7 +10,7 @@ import sys import shlex -from typing import List, Mapping, Set +from typing import List ##### Common utilities for update_*test_checks.py @@ -422,48 +420,6 @@ def should_add_line_to_output( return True -def collect_original_check_lines(ti: TestInfo, prefix_set: set): - """ - Collect pre-existing check lines into a dictionary `result` which is - returned. - - result[func_name][prefix] is filled with a list of right-hand-sides of check - lines. - """ - result = {} - - current_function = None - for input_line_info in ti.ro_iterlines(): - input_line = input_line_info.line - if current_function is not None: - if input_line == "": - continue - if input_line.lstrip().startswith(";"): - m = CHECK_RE.match(input_line) - if ( - m is not None - and m.group(1) in prefix_set - and m.group(2) not in ["LABEL", "SAME"] - ): - if m.group(1) not in current_function: - current_function[m.group(1)] = [] - current_function[m.group(1)].append(input_line[m.end() :].strip()) - continue - current_function = None - - m = IR_FUNCTION_RE.match(input_line) - if m is not None: - func_name = m.group(1) - if ti.args.function is not None and func_name != ti.args.function: - # When filtering on a specific function, skip all others. - continue - - assert func_name not in result - current_function = result[func_name] = {} - - return result - - # Perform lit-like substitutions def getSubstitutions(sourcepath): sourcedir = os.path.dirname(sourcepath) @@ -535,7 +491,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") CHECK_RE = re.compile( - r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" + r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" ) CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") @@ -1231,325 +1187,20 @@ def may_clash_with_default_check_prefix_name(check_prefix, var): ) -def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: - """ - Find a large ordered matching between strings in lhs and rhs. - - Think of this as finding the *unchanged* lines in a diff, where the entries - of lhs and rhs are lines of the files being diffed. - - Returns a list of matched (lhs_idx, rhs_idx) pairs. - """ - - if not lhs or not rhs: - return [] - - # Collect matches in reverse order. - matches = [] - - # First, collect a set of candidate matching edges. We limit this to a - # constant multiple of the input size to avoid quadratic runtime. - patterns = collections.defaultdict(lambda: ([], [])) - - for idx in range(len(lhs)): - patterns[lhs[idx]][0].append(idx) - for idx in range(len(rhs)): - patterns[rhs[idx]][1].append(idx) - - multiple_patterns = [] - - candidates = [] - for pattern in patterns.values(): - if not pattern[0] or not pattern[1]: - continue - - if len(pattern[0]) == len(pattern[1]) == 1: - candidates.append((pattern[0][0], pattern[1][0])) - else: - multiple_patterns.append(pattern) - - multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) - - for pattern in multiple_patterns: - if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( - len(lhs) + len(rhs) - ): - break - for lhs_idx in pattern[0]: - for rhs_idx in pattern[1]: - candidates.append((lhs_idx, rhs_idx)) - - if not candidates: - # The LHS and RHS either share nothing in common, or lines are just too - # identical. In that case, let's give up and not match anything. - return [] - - # Compute a maximal crossing-free matching via an algorithm that is - # inspired by a mixture of dynamic programming and line-sweeping in - # discrete geometry. - # - # I would be surprised if this algorithm didn't exist somewhere in the - # literature, but I found it without consciously recalling any - # references, so you'll have to make do with the explanation below. - # Sorry. - # - # The underlying graph is bipartite: - # - nodes on the LHS represent lines in the original check - # - nodes on the RHS represent lines in the new (updated) check - # - # Nodes are implicitly sorted by the corresponding line number. - # Edges (unique_matches) are sorted by the line number on the LHS. - # - # Here's the geometric intuition for the algorithm. - # - # * Plot the edges as points in the plane, with the original line - # number on the X axis and the updated line number on the Y axis. - # * The goal is to find a longest "chain" of points where each point - # is strictly above and to the right of the previous point. - # * The algorithm proceeds by sweeping a vertical line from left to - # right. - # * The algorithm maintains a table where `table[N]` answers the - # question "What is currently the 'best' way to build a chain of N+1 - # points to the left of the vertical line". Here, 'best' means - # that the last point of the chain is a as low as possible (minimal - # Y coordinate). - # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of - # the last point in the chain and `y` is its Y coordinate - # * A key invariant is that the Y values in the table are - # monotonically increasing - # * Thanks to these properties, the table can be used to answer the - # question "What is the longest chain that can be built to the left - # of the vertical line using only points below a certain Y value", - # using a binary search over the table. - # * The algorithm also builds a backlink structure in which every point - # links back to the previous point on a best (longest) chain ending - # at that point - # - # The core loop of the algorithm sweeps the line and updates the table - # and backlink structure for every point that we cross during the sweep. - # Therefore, the algorithm is trivially O(M log M) in the number of - # points. - candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) - - backlinks = [] - table = [] - for _, rhs_idx in candidates: - candidate_idx = len(backlinks) - ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0]) - - # Update the table to record a best chain ending in the current point. - # There always is one, and if any of the previously visited points had - # a higher Y coordinate, then there is always a previously recorded best - # chain that can be improved upon by using the current point. - # - # There is only one case where there is some ambiguity. If the - # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as - # the current point (this can only happen if the same line appeared - # multiple times on the LHS), then we could choose to keep the - # previously recorded best chain instead. That would bias the algorithm - # differently but should have no systematic impact on the quality of the - # result. - if ti < len(table): - table[ti] = (rhs_idx, candidate_idx) - else: - table.append((rhs_idx, candidate_idx)) - if ti > 0: - backlinks.append(table[ti - 1][1]) - else: - backlinks.append(None) - - # Commit to names in the matching by walking the backlinks. Recursively - # attempt to fill in more matches in-betweem. - match_idx = table[-1][1] - while match_idx is not None: - current = candidates[match_idx] - matches.append(current) - match_idx = backlinks[match_idx] - - matches.reverse() - return matches - - -VARIABLE_TAG = "[[@@]]" -METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") -NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") - - -class CheckValueInfo: - def __init__( - self, - nameless_value: NamelessValue, - var: str, - prefix: str, - ): - self.nameless_value = nameless_value - self.var = var - self.prefix = prefix - - -# Represent a check line in a way that allows us to compare check lines while -# ignoring some or all of the FileCheck variable names. -class CheckLineInfo: - def __init__(self, line, values): - # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG - self.line: str = line - - # Information on each FileCheck variable name occurrences in the line - self.values: List[CheckValueInfo] = values - - def __repr__(self): - return f"CheckLineInfo(line={self.line}, self.values={self.values})" - - -def remap_metavar_names( - old_line_infos: List[CheckLineInfo], - new_line_infos: List[CheckLineInfo], - committed_names: Set[str], -) -> Mapping[str, str]: - """ - Map all FileCheck variable names that appear in new_line_infos to new - FileCheck variable names in an attempt to reduce the diff from old_line_infos - to new_line_infos. - - This is done by: - * Matching old check lines and new check lines using a diffing algorithm - applied after replacing names with wildcards. - * Committing to variable names such that the matched lines become equal - (without wildcards) if possible - * This is done recursively to handle cases where many lines are equal - after wildcard replacement - """ - # Initialize uncommitted identity mappings - new_mapping = {} - for line in new_line_infos: - for value in line.values: - new_mapping[value.var] = value.var - - # Recursively commit to the identity mapping or find a better one - def recurse(old_begin, old_end, new_begin, new_end): - if old_begin == old_end or new_begin == new_end: - return - - # Find a matching of lines where uncommitted names are replaced - # with a placeholder. - def diffify_line(line, mapper): - values = [] - for value in line.values: - mapped = mapper(value.var) - values.append(mapped if mapped in committed_names else "?") - return line.line.strip() + " @@@ " + " @ ".join(values) - - lhs_lines = [ - diffify_line(line, lambda x: x) - for line in old_line_infos[old_begin:old_end] - ] - rhs_lines = [ - diffify_line(line, lambda x: new_mapping[x]) - for line in new_line_infos[new_begin:new_end] - ] - - candidate_matches = find_diff_matching(lhs_lines, rhs_lines) - - # Apply commits greedily on a match-by-match basis - matches = [(-1, -1)] - committed_anything = False - for lhs_idx, rhs_idx in candidate_matches: - lhs_line = old_line_infos[lhs_idx] - rhs_line = new_line_infos[rhs_idx] - - local_commits = {} - - for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): - if new_mapping[rhs_value.var] in committed_names: - # The new value has already been committed. If it was mapped - # to the same name as the original value, we can consider - # committing other values from this line. Otherwise, we - # should ignore this line. - if new_mapping[rhs_value.var] == lhs_value.var: - continue - else: - break - - if rhs_value.var in local_commits: - # Same, but for a possible commit happening on the same line - if local_commits[rhs_value.var] == lhs_value.var: - continue - else: - break - - if lhs_value.var in committed_names: - # We can't map this value because the name we would map it to has already been - # committed for something else. Give up on this line. - break - - local_commits[rhs_value.var] = lhs_value.var - else: - # No reason not to add any commitments for this line - for rhs_var, lhs_var in local_commits.items(): - new_mapping[rhs_var] = lhs_var - committed_names.add(lhs_var) - committed_anything = True - - if ( - lhs_var != rhs_var - and lhs_var in new_mapping - and new_mapping[lhs_var] == lhs_var - ): - new_mapping[lhs_var] = "conflict_" + lhs_var - - matches.append((lhs_idx, rhs_idx)) - - matches.append((old_end, new_end)) - - # Recursively handle sequences between matches - if committed_anything: - for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): - recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) - - recurse(0, len(old_line_infos), 0, len(new_line_infos)) - - # Commit to remaining names and resolve conflicts - for new_name, mapped_name in new_mapping.items(): - if mapped_name in committed_names: - continue - if not mapped_name.startswith("conflict_"): - assert mapped_name == new_name - committed_names.add(mapped_name) - - for new_name, mapped_name in new_mapping.items(): - if mapped_name in committed_names: - continue - assert mapped_name.startswith("conflict_") - - m = NUMERIC_SUFFIX_RE.search(new_name) - base_name = new_name[: m.start()] - suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 - while True: - candidate = f"{base_name}{suffix}" - if candidate not in committed_names: - new_mapping[new_name] = candidate - committed_names.add(candidate) - break - suffix += 1 - - return new_mapping - - def generalize_check_lines_common( lines, is_analyze, vars_seen, global_vars_seen, nameless_values, - nameless_value_regex: re.Pattern, + nameless_value_regex, is_asm, preserve_names, - original_check_lines=None, ): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. - def transform_line_vars(match, transform_locals=True): + def transform_line_vars(match): var = get_name_from_ir_value_match(match) nameless_value = get_nameless_value_from_match(match, nameless_values) if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): @@ -1559,8 +1210,6 @@ def transform_line_vars(match, transform_locals=True): ) key = (var, nameless_value.check_key) is_local_def = nameless_value.is_local_def_ir_value() - if is_local_def and not transform_locals: - return None if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: @@ -1579,15 +1228,13 @@ def transform_line_vars(match, transform_locals=True): # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) - def transform_non_local_line_vars(match): - return transform_line_vars(match, False) - + lines_with_def = [] multiple_braces_re = re.compile(r"({{+)|(}}+)") def escape_braces(match_obj): return '{{' + re.escape(match_obj.group(0)) + '}}' - if not is_asm and not is_analyze: - for i, line in enumerate(lines): + for i, line in enumerate(lines): + if not is_asm and not is_analyze: # An IR variable named '%.' matches the FileCheck regex string. line = line.replace("%.", "%dot") for regex in _global_hex_value_regex: @@ -1605,136 +1252,25 @@ def escape_braces(match_obj): # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) lines[i] = scrubbed_line - - if not preserve_names: - if is_asm: - for i, _ in enumerate(lines): - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn( - transform_line_vars, lines[i], count=1 - ) - else: - # LLVM IR case. Start by handling global meta variables (global IR variables, - # metadata, attributes) - for i, _ in enumerate(lines): - start = 0 - while True: - m = nameless_value_regex.search(lines[i][start:]) - if m is None: - break - start += m.start() - sub = transform_non_local_line_vars(m) - if sub is not None: - lines[i] = ( - lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] - ) - start += 1 - - # Collect information about new check lines and original check lines (if any) - new_line_infos = [] - for line in lines: - filtered_line = "" - values = [] - while True: - m = nameless_value_regex.search(line) - if m is None: - filtered_line += line - break - - var = get_name_from_ir_value_match(m) - nameless_value = get_nameless_value_from_match(m, nameless_values) - var = nameless_value.get_value_name( - var, nameless_value.check_prefix - ) - - # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. - filtered_line += ( - line[: m.start()] - + m.group(1) - + VARIABLE_TAG - + m.group(m.lastindex) - ) - line = line[m.end() :] - values.append( - CheckValueInfo( - nameless_value=nameless_value, - var=var, - prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ - 0 - ], - ) - ) - new_line_infos.append(CheckLineInfo(filtered_line, values)) - - orig_line_infos = [] - for line in original_check_lines or []: - filtered_line = "" - values = [] - while True: - m = METAVAR_RE.search(line) - if m is None: - filtered_line += line - break - - # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. - filtered_line += line[: m.start()] + VARIABLE_TAG - line = line[m.end() :] - values.append( - CheckValueInfo( - nameless_value=None, - var=m.group(1), - prefix=None, - ) - ) - orig_line_infos.append(CheckLineInfo(filtered_line, values)) - - # Compute the variable name mapping - committed_names = set(vars_seen) - - mapping = remap_metavar_names( - orig_line_infos, new_line_infos, committed_names - ) - - for i, line_info in enumerate(new_line_infos): - line_template = line_info.line - line = "" - - for value in line_info.values: - idx = line_template.find(VARIABLE_TAG) - line += line_template[:idx] - line_template = line_template[idx + len(VARIABLE_TAG) :] - - key = (mapping[value.var], nameless_value.check_key) - is_local_def = nameless_value.is_local_def_ir_value() - if is_local_def: - if mapping[value.var] in vars_seen: - line += f"[[{mapping[value.var]}]]" - else: - line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" - vars_seen.add(mapping[value.var]) - else: - raise RuntimeError("not implemented") - - line += line_template - - lines[i] = line - - if is_analyze: - for i, _ in enumerate(lines): + if not preserve_names: + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = nameless_value_regex.subn( + transform_line_vars, lines[i], count=1 + ) + if is_analyze: # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) lines[i] = scrubbed_line - return lines # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines( - lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines + lines, is_analyze, vars_seen, global_vars_seen, preserve_names ): return generalize_check_lines_common( lines, @@ -1745,7 +1281,6 @@ def generalize_check_lines( IR_VALUE_RE, False, preserve_names, - original_check_lines=original_check_lines, ) @@ -1802,7 +1337,6 @@ def add_checks( global_vars_seen_dict, is_filtered, preserve_names=False, - original_check_lines: Mapping[str, List[str]] = {}, ): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() @@ -1875,7 +1409,6 @@ def add_checks( vars_seen, global_vars_seen, preserve_names, - original_check_lines=[], )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: @@ -1983,12 +1516,7 @@ def add_checks( # to variable naming fashions. else: func_body = generalize_check_lines( - func_body, - False, - vars_seen, - global_vars_seen, - preserve_names, - original_check_lines=original_check_lines.get(checkprefix), + func_body, False, vars_seen, global_vars_seen, preserve_names ) # This could be selectively enabled with an optional invocation argument. @@ -2050,7 +1578,6 @@ def add_ir_checks( version, global_vars_seen_dict, is_filtered, - original_check_lines={}, ): # Label format is based on IR string. if function_sig and version > 1: @@ -2075,7 +1602,6 @@ def add_ir_checks( global_vars_seen_dict, is_filtered, preserve_names, - original_check_lines=original_check_lines, ) @@ -2364,7 +1890,6 @@ def get_autogennote_suffix(parser, args): "llvm_bin", "verbose", "force_update", - "reset_variable_names", ): continue value = getattr(args, action.dest) diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index 04808ce6bb1c6f..b5077d79351378 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -85,12 +85,6 @@ def main(): choices=["none", "smart", "all"], help="Check global entries (global variables, metadata, attribute sets, ...) for functions", ) - parser.add_argument( - "--reset-variable-names", - action="store_true", - help="Reset all variable names to correspond closely to the variable names in IR. " - "This tends to result in larger diffs.", - ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) @@ -176,19 +170,13 @@ def main(): ) builder.processed_prefixes(prefixes) - prefix_set = set( - [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] - ) - - if not ti.args.reset_variable_names: - original_check_lines = common.collect_original_check_lines(ti, prefix_set) - else: - original_check_lines = {} - func_dict = builder.finish_and_get_func_dict() is_in_function = False is_in_function_start = False has_checked_pre_function_globals = False + prefix_set = set( + [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] + ) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -242,7 +230,6 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), - original_check_lines=original_check_lines.get(func, {}), ), ) ) @@ -274,9 +261,6 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), - original_check_lines=original_check_lines.get( - func_name, {} - ), ) ) is_in_function_start = False From 3846019d8f6379ea1a8bf3a0fdfb0202de8e2f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Nov 2023 06:46:07 +0100 Subject: [PATCH 152/158] update_test_checks: keep meta variables stable by default Resubmitting this after previous revert with the following changes: - Split table into table_rhs_idx and table_candidate_idx so that bisect.bisect_left can be used without the `key` argument, which was introduced in Python 3.10 - Remove a re.Pattern type annotation Original commit message: Prior to this change, running UTC on larger tests, especially tests with unnamed IR values, often resulted in a spuriously large diff because e.g. TMPnn variables in the CHECK lines were renumbered. This change attempts to reduce the diff by keeping those variable names the same. There are cases in which this "drift" of variable names can end up being more confusing. The old behavior can be re-enabled with the --reset-variable-names command line argument. The improvement may not be immediately apparent in the diff of this change. The point is that the diff of stable_ir_values.ll against stable_ir_values.ll.expected after this change is smaller. Ideally, we'd also keep meta variables for "global" objects stable, e.g. for attributes (#nn) and metadata (!nn). However, that would require a much more substantial refactoring of how we generate check lines, so I left it for future work. --- .../Inputs/stable_ir_values.ll.expected | 10 +- .../Inputs/stable_ir_values.ll.expected.reset | 23 + .../Inputs/stable_ir_values2.ll.expected | 12 +- .../Inputs/stable_ir_values3.ll | 3 - .../Inputs/stable_ir_values3.ll.expected | 3 - .../Inputs/stable_ir_values4.ll.expected | 8 +- .../update_test_checks/stable_ir_values.test | 3 + llvm/utils/UpdateTestChecks/common.py | 514 +++++++++++++++++- llvm/utils/update_test_checks.py | 22 +- 9 files changed, 556 insertions(+), 42 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected index 5142e3ed32ba45..3549a4d76aa762 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected @@ -9,11 +9,11 @@ define i32 @func({i32, i32} %x, i32 %y) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] ; %x.i34 = extractvalue {i32, i32} %x, 0 %1 = add i32 %y, 1 diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset new file mode 100644 index 00000000000000..5142e3ed32ba45 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected index 53f60bda8ee591..6311a55a1f9de1 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected @@ -5,12 +5,12 @@ define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] ; %1 = icmp eq i32 %x, 0 %2 = call i32 @foo(i1 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll index 3b449291d0e7f8..a4f4fc67f78d3f 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s -; Test that we don't regress diff quality by trying to keep variable names -; stable (and messing up the matching). - define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected index 3d0f772505a659..08d3c22172ee3f 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -S | FileCheck %s -; Test that we don't regress diff quality by trying to keep variable names -; stable (and messing up the matching). - define i32 @func(i32 %x) { ; CHECK-LABEL: define i32 @func( ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected index 5962bdafd9ea0a..e3fa51598c48e3 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected @@ -15,12 +15,12 @@ define i32 @func(i32 %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP12]]) -; CHECK-NEXT: ret i32 [[TMP13]] +; CHECK-NEXT: ret i32 [[TMP12]] ; %1 = mul i32 %x, 3 %2 = call i32 @foo(i32 %1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test index c6287a6b29ca92..4dfaf5d25c8a69 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test @@ -1,2 +1,5 @@ # RUN: cp -f %S/Inputs/stable_ir_values.ll %t.ll && %update_test_checks %t.ll # RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected +# Now test that we can reset all the names +# RUN: %update_test_checks %t.ll --reset-variable-names +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index a3365fef5f6e7d..ecb19d233a8d1a 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,6 +1,8 @@ from __future__ import print_function import argparse +import bisect +import collections import copy import glob import itertools @@ -10,7 +12,7 @@ import sys import shlex -from typing import List +from typing import List, Mapping, Set ##### Common utilities for update_*test_checks.py @@ -420,6 +422,48 @@ def should_add_line_to_output( return True +def collect_original_check_lines(ti: TestInfo, prefix_set: set): + """ + Collect pre-existing check lines into a dictionary `result` which is + returned. + + result[func_name][prefix] is filled with a list of right-hand-sides of check + lines. + """ + result = {} + + current_function = None + for input_line_info in ti.ro_iterlines(): + input_line = input_line_info.line + if current_function is not None: + if input_line == "": + continue + if input_line.lstrip().startswith(";"): + m = CHECK_RE.match(input_line) + if ( + m is not None + and m.group(1) in prefix_set + and m.group(2) not in ["LABEL", "SAME"] + ): + if m.group(1) not in current_function: + current_function[m.group(1)] = [] + current_function[m.group(1)].append(input_line[m.end() :].strip()) + continue + current_function = None + + m = IR_FUNCTION_RE.match(input_line) + if m is not None: + func_name = m.group(1) + if ti.args.function is not None and func_name != ti.args.function: + # When filtering on a specific function, skip all others. + continue + + assert func_name not in result + current_function = result[func_name] = {} + + return result + + # Perform lit-like substitutions def getSubstitutions(sourcepath): sourcedir = os.path.dirname(sourcepath) @@ -491,7 +535,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") CHECK_RE = re.compile( - r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" + r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" ) CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") @@ -1187,6 +1231,313 @@ def may_clash_with_default_check_prefix_name(check_prefix, var): ) +def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: + """ + Find a large ordered matching between strings in lhs and rhs. + + Think of this as finding the *unchanged* lines in a diff, where the entries + of lhs and rhs are lines of the files being diffed. + + Returns a list of matched (lhs_idx, rhs_idx) pairs. + """ + + if not lhs or not rhs: + return [] + + # Collect matches in reverse order. + matches = [] + + # First, collect a set of candidate matching edges. We limit this to a + # constant multiple of the input size to avoid quadratic runtime. + patterns = collections.defaultdict(lambda: ([], [])) + + for idx in range(len(lhs)): + patterns[lhs[idx]][0].append(idx) + for idx in range(len(rhs)): + patterns[rhs[idx]][1].append(idx) + + multiple_patterns = [] + + candidates = [] + for pattern in patterns.values(): + if not pattern[0] or not pattern[1]: + continue + + if len(pattern[0]) == len(pattern[1]) == 1: + candidates.append((pattern[0][0], pattern[1][0])) + else: + multiple_patterns.append(pattern) + + multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) + + for pattern in multiple_patterns: + if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( + len(lhs) + len(rhs) + ): + break + for lhs_idx in pattern[0]: + for rhs_idx in pattern[1]: + candidates.append((lhs_idx, rhs_idx)) + + if not candidates: + # The LHS and RHS either share nothing in common, or lines are just too + # identical. In that case, let's give up and not match anything. + return [] + + # Compute a maximal crossing-free matching via an algorithm that is + # inspired by a mixture of dynamic programming and line-sweeping in + # discrete geometry. + # + # I would be surprised if this algorithm didn't exist somewhere in the + # literature, but I found it without consciously recalling any + # references, so you'll have to make do with the explanation below. + # Sorry. + # + # The underlying graph is bipartite: + # - nodes on the LHS represent lines in the original check + # - nodes on the RHS represent lines in the new (updated) check + # + # Nodes are implicitly sorted by the corresponding line number. + # Edges (unique_matches) are sorted by the line number on the LHS. + # + # Here's the geometric intuition for the algorithm. + # + # * Plot the edges as points in the plane, with the original line + # number on the X axis and the updated line number on the Y axis. + # * The goal is to find a longest "chain" of points where each point + # is strictly above and to the right of the previous point. + # * The algorithm proceeds by sweeping a vertical line from left to + # right. + # * The algorithm maintains a table where `table[N]` answers the + # question "What is currently the 'best' way to build a chain of N+1 + # points to the left of the vertical line". Here, 'best' means + # that the last point of the chain is a as low as possible (minimal + # Y coordinate). + # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of + # the last point in the chain and `y` is its Y coordinate + # * A key invariant is that the Y values in the table are + # monotonically increasing + # * Thanks to these properties, the table can be used to answer the + # question "What is the longest chain that can be built to the left + # of the vertical line using only points below a certain Y value", + # using a binary search over the table. + # * The algorithm also builds a backlink structure in which every point + # links back to the previous point on a best (longest) chain ending + # at that point + # + # The core loop of the algorithm sweeps the line and updates the table + # and backlink structure for every point that we cross during the sweep. + # Therefore, the algorithm is trivially O(M log M) in the number of + # points. + candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) + + backlinks = [] + table_rhs_idx = [] + table_candidate_idx = [] + for _, rhs_idx in candidates: + candidate_idx = len(backlinks) + ti = bisect.bisect_left(table_rhs_idx, rhs_idx) + + # Update the table to record a best chain ending in the current point. + # There always is one, and if any of the previously visited points had + # a higher Y coordinate, then there is always a previously recorded best + # chain that can be improved upon by using the current point. + # + # There is only one case where there is some ambiguity. If the + # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as + # the current point (this can only happen if the same line appeared + # multiple times on the LHS), then we could choose to keep the + # previously recorded best chain instead. That would bias the algorithm + # differently but should have no systematic impact on the quality of the + # result. + if ti < len(table_rhs_idx): + table_rhs_idx[ti] = rhs_idx + table_candidate_idx[ti] = candidate_idx + else: + table_rhs_idx.append(rhs_idx) + table_candidate_idx.append(candidate_idx) + if ti > 0: + backlinks.append(table_candidate_idx[ti - 1]) + else: + backlinks.append(None) + + # Commit to names in the matching by walking the backlinks. Recursively + # attempt to fill in more matches in-betweem. + match_idx = table_candidate_idx[-1] + while match_idx is not None: + current = candidates[match_idx] + matches.append(current) + match_idx = backlinks[match_idx] + + matches.reverse() + return matches + + +VARIABLE_TAG = "[[@@]]" +METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") +NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") + + +class CheckValueInfo: + def __init__( + self, + nameless_value: NamelessValue, + var: str, + prefix: str, + ): + self.nameless_value = nameless_value + self.var = var + self.prefix = prefix + + +# Represent a check line in a way that allows us to compare check lines while +# ignoring some or all of the FileCheck variable names. +class CheckLineInfo: + def __init__(self, line, values): + # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG + self.line: str = line + + # Information on each FileCheck variable name occurrences in the line + self.values: List[CheckValueInfo] = values + + def __repr__(self): + return f"CheckLineInfo(line={self.line}, self.values={self.values})" + + +def remap_metavar_names( + old_line_infos: List[CheckLineInfo], + new_line_infos: List[CheckLineInfo], + committed_names: Set[str], +) -> Mapping[str, str]: + """ + Map all FileCheck variable names that appear in new_line_infos to new + FileCheck variable names in an attempt to reduce the diff from old_line_infos + to new_line_infos. + + This is done by: + * Matching old check lines and new check lines using a diffing algorithm + applied after replacing names with wildcards. + * Committing to variable names such that the matched lines become equal + (without wildcards) if possible + * This is done recursively to handle cases where many lines are equal + after wildcard replacement + """ + # Initialize uncommitted identity mappings + new_mapping = {} + for line in new_line_infos: + for value in line.values: + new_mapping[value.var] = value.var + + # Recursively commit to the identity mapping or find a better one + def recurse(old_begin, old_end, new_begin, new_end): + if old_begin == old_end or new_begin == new_end: + return + + # Find a matching of lines where uncommitted names are replaced + # with a placeholder. + def diffify_line(line, mapper): + values = [] + for value in line.values: + mapped = mapper(value.var) + values.append(mapped if mapped in committed_names else "?") + return line.line.strip() + " @@@ " + " @ ".join(values) + + lhs_lines = [ + diffify_line(line, lambda x: x) + for line in old_line_infos[old_begin:old_end] + ] + rhs_lines = [ + diffify_line(line, lambda x: new_mapping[x]) + for line in new_line_infos[new_begin:new_end] + ] + + candidate_matches = find_diff_matching(lhs_lines, rhs_lines) + + # Apply commits greedily on a match-by-match basis + matches = [(-1, -1)] + committed_anything = False + for lhs_idx, rhs_idx in candidate_matches: + lhs_line = old_line_infos[lhs_idx] + rhs_line = new_line_infos[rhs_idx] + + local_commits = {} + + for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): + if new_mapping[rhs_value.var] in committed_names: + # The new value has already been committed. If it was mapped + # to the same name as the original value, we can consider + # committing other values from this line. Otherwise, we + # should ignore this line. + if new_mapping[rhs_value.var] == lhs_value.var: + continue + else: + break + + if rhs_value.var in local_commits: + # Same, but for a possible commit happening on the same line + if local_commits[rhs_value.var] == lhs_value.var: + continue + else: + break + + if lhs_value.var in committed_names: + # We can't map this value because the name we would map it to has already been + # committed for something else. Give up on this line. + break + + local_commits[rhs_value.var] = lhs_value.var + else: + # No reason not to add any commitments for this line + for rhs_var, lhs_var in local_commits.items(): + new_mapping[rhs_var] = lhs_var + committed_names.add(lhs_var) + committed_anything = True + + if ( + lhs_var != rhs_var + and lhs_var in new_mapping + and new_mapping[lhs_var] == lhs_var + ): + new_mapping[lhs_var] = "conflict_" + lhs_var + + matches.append((lhs_idx, rhs_idx)) + + matches.append((old_end, new_end)) + + # Recursively handle sequences between matches + if committed_anything: + for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): + recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) + + recurse(0, len(old_line_infos), 0, len(new_line_infos)) + + # Commit to remaining names and resolve conflicts + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + if not mapped_name.startswith("conflict_"): + assert mapped_name == new_name + committed_names.add(mapped_name) + + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + assert mapped_name.startswith("conflict_") + + m = NUMERIC_SUFFIX_RE.search(new_name) + base_name = new_name[: m.start()] + suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 + while True: + candidate = f"{base_name}{suffix}" + if candidate not in committed_names: + new_mapping[new_name] = candidate + committed_names.add(candidate) + break + suffix += 1 + + return new_mapping + + def generalize_check_lines_common( lines, is_analyze, @@ -1196,11 +1547,12 @@ def generalize_check_lines_common( nameless_value_regex, is_asm, preserve_names, + original_check_lines=None, ): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. - def transform_line_vars(match): + def transform_line_vars(match, transform_locals=True): var = get_name_from_ir_value_match(match) nameless_value = get_nameless_value_from_match(match, nameless_values) if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): @@ -1210,6 +1562,8 @@ def transform_line_vars(match): ) key = (var, nameless_value.check_key) is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def and not transform_locals: + return None if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: @@ -1228,13 +1582,15 @@ def transform_line_vars(match): # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) - lines_with_def = [] + def transform_non_local_line_vars(match): + return transform_line_vars(match, False) + multiple_braces_re = re.compile(r"({{+)|(}}+)") def escape_braces(match_obj): return '{{' + re.escape(match_obj.group(0)) + '}}' - for i, line in enumerate(lines): - if not is_asm and not is_analyze: + if not is_asm and not is_analyze: + for i, line in enumerate(lines): # An IR variable named '%.' matches the FileCheck regex string. line = line.replace("%.", "%dot") for regex in _global_hex_value_regex: @@ -1252,25 +1608,136 @@ def escape_braces(match_obj): # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) lines[i] = scrubbed_line - if not preserve_names: - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn( - transform_line_vars, lines[i], count=1 - ) - if is_analyze: + + if not preserve_names: + if is_asm: + for i, _ in enumerate(lines): + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = nameless_value_regex.subn( + transform_line_vars, lines[i], count=1 + ) + else: + # LLVM IR case. Start by handling global meta variables (global IR variables, + # metadata, attributes) + for i, _ in enumerate(lines): + start = 0 + while True: + m = nameless_value_regex.search(lines[i][start:]) + if m is None: + break + start += m.start() + sub = transform_non_local_line_vars(m) + if sub is not None: + lines[i] = ( + lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] + ) + start += 1 + + # Collect information about new check lines and original check lines (if any) + new_line_infos = [] + for line in lines: + filtered_line = "" + values = [] + while True: + m = nameless_value_regex.search(line) + if m is None: + filtered_line += line + break + + var = get_name_from_ir_value_match(m) + nameless_value = get_nameless_value_from_match(m, nameless_values) + var = nameless_value.get_value_name( + var, nameless_value.check_prefix + ) + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += ( + line[: m.start()] + + m.group(1) + + VARIABLE_TAG + + m.group(m.lastindex) + ) + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=nameless_value, + var=var, + prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ + 0 + ], + ) + ) + new_line_infos.append(CheckLineInfo(filtered_line, values)) + + orig_line_infos = [] + for line in original_check_lines or []: + filtered_line = "" + values = [] + while True: + m = METAVAR_RE.search(line) + if m is None: + filtered_line += line + break + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += line[: m.start()] + VARIABLE_TAG + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=None, + var=m.group(1), + prefix=None, + ) + ) + orig_line_infos.append(CheckLineInfo(filtered_line, values)) + + # Compute the variable name mapping + committed_names = set(vars_seen) + + mapping = remap_metavar_names( + orig_line_infos, new_line_infos, committed_names + ) + + for i, line_info in enumerate(new_line_infos): + line_template = line_info.line + line = "" + + for value in line_info.values: + idx = line_template.find(VARIABLE_TAG) + line += line_template[:idx] + line_template = line_template[idx + len(VARIABLE_TAG) :] + + key = (mapping[value.var], nameless_value.check_key) + is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def: + if mapping[value.var] in vars_seen: + line += f"[[{mapping[value.var]}]]" + else: + line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" + vars_seen.add(mapping[value.var]) + else: + raise RuntimeError("not implemented") + + line += line_template + + lines[i] = line + + if is_analyze: + for i, _ in enumerate(lines): # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) lines[i] = scrubbed_line + return lines # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines( - lines, is_analyze, vars_seen, global_vars_seen, preserve_names + lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines ): return generalize_check_lines_common( lines, @@ -1281,6 +1748,7 @@ def generalize_check_lines( IR_VALUE_RE, False, preserve_names, + original_check_lines=original_check_lines, ) @@ -1337,6 +1805,7 @@ def add_checks( global_vars_seen_dict, is_filtered, preserve_names=False, + original_check_lines: Mapping[str, List[str]] = {}, ): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() @@ -1409,6 +1878,7 @@ def add_checks( vars_seen, global_vars_seen, preserve_names, + original_check_lines=[], )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: @@ -1516,7 +1986,12 @@ def add_checks( # to variable naming fashions. else: func_body = generalize_check_lines( - func_body, False, vars_seen, global_vars_seen, preserve_names + func_body, + False, + vars_seen, + global_vars_seen, + preserve_names, + original_check_lines=original_check_lines.get(checkprefix), ) # This could be selectively enabled with an optional invocation argument. @@ -1578,6 +2053,7 @@ def add_ir_checks( version, global_vars_seen_dict, is_filtered, + original_check_lines={}, ): # Label format is based on IR string. if function_sig and version > 1: @@ -1602,6 +2078,7 @@ def add_ir_checks( global_vars_seen_dict, is_filtered, preserve_names, + original_check_lines=original_check_lines, ) @@ -1890,6 +2367,7 @@ def get_autogennote_suffix(parser, args): "llvm_bin", "verbose", "force_update", + "reset_variable_names", ): continue value = getattr(args, action.dest) diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index b5077d79351378..04808ce6bb1c6f 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -85,6 +85,12 @@ def main(): choices=["none", "smart", "all"], help="Check global entries (global variables, metadata, attribute sets, ...) for functions", ) + parser.add_argument( + "--reset-variable-names", + action="store_true", + help="Reset all variable names to correspond closely to the variable names in IR. " + "This tends to result in larger diffs.", + ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) @@ -170,13 +176,19 @@ def main(): ) builder.processed_prefixes(prefixes) + prefix_set = set( + [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] + ) + + if not ti.args.reset_variable_names: + original_check_lines = common.collect_original_check_lines(ti, prefix_set) + else: + original_check_lines = {} + func_dict = builder.finish_and_get_func_dict() is_in_function = False is_in_function_start = False has_checked_pre_function_globals = False - prefix_set = set( - [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] - ) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -230,6 +242,7 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get(func, {}), ), ) ) @@ -261,6 +274,9 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get( + func_name, {} + ), ) ) is_in_function_start = False From d9c855014123a313006adfc873be6f10e997be61 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Thu, 7 Mar 2024 19:52:28 -0800 Subject: [PATCH 153/158] [flang] Fixed build issues after f20ea05. (#84377) Older versions of clang do not have __builtin_complex, but they may define `__GNUC__`. --- flang/runtime/complex-reduction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c index 72c31ce08b875a..c91d1253991176 100644 --- a/flang/runtime/complex-reduction.c +++ b/flang/runtime/complex-reduction.c @@ -82,7 +82,8 @@ static long_double_Complex_t CMPLXL(long double r, long double i) { * supports __builtin_complex. For Clang, require >=12.0. * Otherwise, rely on the memory layout compatibility. */ -#if (defined(__clang_major__) && (__clang_major__ >= 12)) || defined(__GNUC__) +#if (defined(__clang_major__) && (__clang_major__ >= 12)) || \ + (defined(__GNUC__) && !defined(__clang__)) #define CMPLXF128 __builtin_complex #else static CFloat128ComplexType CMPLXF128(CFloat128Type r, CFloat128Type i) { From 23c658ac4183272221ef358575dca0d386096d36 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 8 Mar 2024 07:09:33 +0000 Subject: [PATCH 154/158] [AArch64] Ensure Neoverse V1 scheduling model includes all SVE pseudos. (#84187) With the many pseudos used in SVE codegen it can be too easy to miss instructions. This enables the existing test we have for checking the scheduling info of the pseudos matches the real instructions, and adjusts the scheduling info in the NeoverseV1 model to make sure all are handled. In the cases I could I opted to use the same info as in the NeoverseV2 model, to keep the differences smaller. --- .../Target/AArch64/AArch64SchedNeoverseV1.td | 105 ++++++++++-------- .../Target/AArch64/AArch64SchedNeoverseV2.td | 6 +- .../AArch64/AArch64SVESchedPseudoTest.cpp | 4 + 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index e50a401f8b2aec..c7dfd64b2fb24e 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -1372,18 +1372,18 @@ def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; // Arithmetic, basic // Logical def : InstRW<[V1Write_2c_1V01], - (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", - "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$", + (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", + "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]", "^ADR_[SU]XTW_ZZZ_D_[0123]$", "^ADR_LSL_ZZZ_[SD]_[0123]$", - "^[SU]ABD_ZP[mZ]Z_[BHSD]$", - "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$", + "^[SU]ABD_ZP[mZ]Z_[BHSD]", + "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]", "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", - "^SUBR_Z(I|P[mZ]Z)_[BHSD]$", + "^SUBR_Z(I|P[mZ]Z)_[BHSD]", "^(AND|EOR|ORR)_ZI$", - "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", + "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ", "^EOR(BT|TB)_ZZZ_[BHSD]$", - "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>; // Arithmetic, shift def : InstRW<[V1Write_2c_1V1], @@ -1394,10 +1394,10 @@ def : InstRW<[V1Write_2c_1V1], "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; // Arithmetic, shift right for divide -def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>; +def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; // Count/reverse bits -def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; // Broadcast logical bitmask immediate to vector def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; @@ -1420,10 +1420,10 @@ def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", "^[SU]CVTF_ZPmZ_StoD")>; // Convert to floating point, 32b to single or half -def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; +def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; // Convert to floating point, 16b to half -def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; +def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; // Copy, scalar def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; @@ -1432,10 +1432,12 @@ def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; // Divides, 32 bit -def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; +def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", + "^[SU]DIV_ZPZZ_S")>; // Divides, 64 bit -def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; +def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", + "^[SU]DIV_ZPZZ_D")>; // Dot product, 8 bit def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; @@ -1454,9 +1456,9 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; // Extend, sign or zero -def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", - "^[SU]XTH_ZPmZ_[SD]$", - "^[SU]XTW_ZPmZ_[D]$")>; +def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; // Extract def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; @@ -1489,18 +1491,22 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; // Multiply, B, H, S element size -def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$", - "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", + "^MUL_ZPZZ_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", + "^[SU]MULH_ZPZZ_[BHS]")>; // Multiply, D element size // Multiply accumulate, D element size -def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$", - "^[SU]MULH_ZPmZ_D$", - "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>; +def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", + "^MUL_ZPZZ_D", + "^[SU]MULH_(ZPmZ|ZZZ)_D", + "^[SU]MULH_ZPZZ_D", + "^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>; // Multiply accumulate, B, H, S element size // NOTE: This is not specified in the SOG. -def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>; // Predicate counting vector def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; @@ -1547,12 +1553,17 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", // ----------------------------------------------------------------------------- // Floating point absolute value/difference +def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FABD_ZPZZ_[HSD]", + "^FABS_ZPmZ_[HSD]")>; + // Floating point arithmetic -def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$", - "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", - "^FADDP_ZPmZZ_[HSD]$", - "^FNEG_ZPmZ_[HSD]$", - "^FSUBR_ZPm[IZ]_[HSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", + "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_ZPm[IZ]_[HSD]", + "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; // Floating point associative add, F16 def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; @@ -1577,40 +1588,44 @@ def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", // Floating point convert, long or narrow (F16 to F32 or F32 to F16) // Floating point convert to integer, F32 -def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", - "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; +def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", + "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) // Floating point convert to integer, F64 -def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", - "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", + "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; // Floating point convert to integer, F16 -def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; +def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; // Floating point copy def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", "^FDUP_ZI_[HSD]$")>; // Floating point divide, F16 -def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; +def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; // Floating point divide, F32 -def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; +def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; // Floating point divide, F64 -def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; +def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; // Floating point min/max -def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", + "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; // Floating point multiply -def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$", - "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; +def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", + "^FMULX_ZPZZ_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", + "^FMUL_ZPZ[IZ]_[HSD]")>; // Floating point multiply accumulate // Floating point reciprocal step def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", + "^FN?ML[AS]_ZPZZZ_[HSD]", "^FML[AS]_ZZZI_[HSD]$", "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; @@ -1624,7 +1639,7 @@ def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; // Floating point reciprocal exponent -def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>; // Floating point reduction, F16 def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; @@ -1636,22 +1651,22 @@ def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$ def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; // Floating point round to integral, F16 -def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; +def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; // Floating point round to integral, F32 -def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; // Floating point round to integral, F64 -def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>; +def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>; +def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>; +def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 807ce40bc5eac1..f10b94523d2e03 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2567,13 +2567,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; +def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; +def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; +def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp index 9d8633353e1f9f..6098d4e6239251 100644 --- a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp @@ -107,6 +107,10 @@ TEST(AArch64SVESchedPseudoTesta510, IsCorrect) { runSVEPseudoTestForCPU("cortex-a510"); } +TEST(AArch64SVESchedPseudoTestv1, IsCorrect) { + runSVEPseudoTestForCPU("neoverse-v1"); +} + TEST(AArch64SVESchedPseudoTestv2, IsCorrect) { runSVEPseudoTestForCPU("neoverse-v2"); } From 2d539db246fd9d26201255b84d04dacf2782eddf Mon Sep 17 00:00:00 2001 From: martinboehme Date: Fri, 8 Mar 2024 08:19:02 +0100 Subject: [PATCH 155/158] [clang][dataflow] When analyzing ctors, don't initialize fields of `*this` with values. (#84164) This is the constructor's job, and we want to be able to test that it does this. --- .../FlowSensitive/DataflowEnvironment.h | 5 ++ .../FlowSensitive/DataflowEnvironment.cpp | 21 ++++++- .../TypeErasedDataflowAnalysis.cpp | 2 +- .../Analysis/FlowSensitive/TestingSupport.cpp | 5 +- .../Analysis/FlowSensitive/TransferTest.cpp | 63 +++++++++++++++++++ 5 files changed, 92 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index e8f009ef6c7913..2330697299fdd7 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -445,6 +445,11 @@ class Environment { return createObjectInternal(&D, D.getType(), InitExpr); } + /// Initializes the fields (including synthetic fields) of `Loc` with values, + /// unless values of the field type are not supported or we hit one of the + /// limits at which we stop producing values. + void initializeFieldsWithValues(RecordStorageLocation &Loc); + /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 62332a18c44a4a..1d2bd9a9b08af3 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -432,8 +432,15 @@ void Environment::initialize() { } } else if (MethodDecl->isImplicitObjectMemberFunction()) { QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); - setThisPointeeStorageLocation( - cast(createObject(ThisPointeeType))); + auto &ThisLoc = + cast(createStorageLocation(ThisPointeeType)); + setThisPointeeStorageLocation(ThisLoc); + refreshRecordValue(ThisLoc, *this); + // Initialize fields of `*this` with values, but only if we're not + // analyzing a constructor; after all, it's the constructor's job to do + // this (and we want to be able to test that). + if (!isa(MethodDecl)) + initializeFieldsWithValues(ThisLoc); } } } @@ -819,6 +826,16 @@ PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc) { + llvm::DenseSet Visited; + int CreatedValuesCount = 0; + initializeFieldsWithValues(Loc, Visited, 0, CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " + << Loc.getType() << '\n'; + } +} + void Environment::setValue(const StorageLocation &Loc, Value &Val) { assert(!isa(&Val) || &cast(&Val)->getLoc() == &Loc); diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index a9f39e153d0ce1..939247c047c66e 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -406,7 +406,6 @@ builtinTransferInitializer(const CFGInitializer &Elt, } } assert(Member != nullptr); - assert(MemberLoc != nullptr); // FIXME: Instead of these case distinctions, we would ideally want to be able // to simply use `Environment::createObject()` here, the same way that we do @@ -422,6 +421,7 @@ builtinTransferInitializer(const CFGInitializer &Elt, ParentLoc->setChild(*Member, InitExprLoc); } else if (auto *InitExprVal = Env.getValue(*InitExpr)) { + assert(MemberLoc != nullptr); if (Member->getType()->isRecordType()) { auto *InitValStruct = cast(InitExprVal); // FIXME: Rather than performing a copy here, we should really be diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp index 09f5524e152c9f..5c4d42c6ccdcf8 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp @@ -179,7 +179,10 @@ llvm::Error test::checkDataflowWithNoopAnalysis( // -fnodelayed-template-parsing is the default everywhere but on Windows. // Set it explicitly so that tests behave the same on Windows as on other // platforms. - "-fsyntax-only", "-fno-delayed-template-parsing", + // Set -Wno-unused-value because it's often desirable in tests to write + // expressions with unused value, and we don't want the output to be + // cluttered with warnings about them. + "-fsyntax-only", "-fno-delayed-template-parsing", "-Wno-unused-value", "-std=" + std::string(LangStandard::getLangStandardForKind(Std).getName())}; AnalysisInputs AI( diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index f534ccb1254701..9fde4179db1c49 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1476,6 +1476,69 @@ TEST(TransferTest, BaseClassInitializer) { llvm::Succeeded()); } +TEST(TransferTest, FieldsDontHaveValuesInConstructor) { + // In a constructor, unlike in regular member functions, we don't want fields + // to be pre-initialized with values, because doing so is the job of the + // constructor. + std::string Code = R"( + struct target { + target() { + 0; + // [[p]] + // Mention the field so it is modeled; + Val; + } + + int Val; + }; + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", + ASTCtx, Env), + nullptr); + }); +} + +TEST(TransferTest, FieldsDontHaveValuesInConstructorWithBaseClass) { + // See above, but for a class with a base class. + std::string Code = R"( + struct Base { + int BaseVal; + }; + + struct target : public Base { + target() { + 0; + // [[p]] + // Mention the fields so they are modeled. + BaseVal; + Val; + } + + int Val; + }; + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + // FIXME: The field of the base class should already have been + // initialized with a value by the base constructor. This test documents + // the current buggy behavior. + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", + ASTCtx, Env), + nullptr); + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", + ASTCtx, Env), + nullptr); + }); +} + TEST(TransferTest, StructModeledFieldsWithAccessor) { std::string Code = R"( class S { From 9b74c43d70f4b39d6fea7b542d77f2b652e4d651 Mon Sep 17 00:00:00 2001 From: martinboehme Date: Fri, 8 Mar 2024 08:19:41 +0100 Subject: [PATCH 156/158] [clang][dataflow] Add context-sensitive test for returning a record by value. (#84317) I'm making some changes to `Environment::getResultObjectLocation()`, with the ultimate goal of eliminating `RecordValue` entirely, and I'd like to make sure I don't break this behavior (and I've realized we don't have a test for it yet). --- .../Analysis/FlowSensitive/TransferTest.cpp | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9fde4179db1c49..a8c282f140b4cd 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -5735,6 +5735,39 @@ TEST(TransferTest, ContextSensitiveReturnInt) { {BuiltinOptions{ContextSensitiveOptions{}}}); } +TEST(TransferTest, ContextSensitiveReturnRecord) { + std::string Code = R"( + struct S { + bool B; + }; + + S makeS(bool BVal) { return {BVal}; } + + void target() { + S FalseS = makeS(false); + S TrueS = makeS(true); + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &FalseSLoc = + getLocForDecl(ASTCtx, Env, "FalseS"); + auto &TrueSLoc = + getLocForDecl(ASTCtx, Env, "TrueS"); + + EXPECT_EQ(getFieldValue(&FalseSLoc, "B", ASTCtx, Env), + &Env.getBoolLiteralValue(false)); + EXPECT_EQ(getFieldValue(&TrueSLoc, "B", ASTCtx, Env), + &Env.getBoolLiteralValue(true)); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + TEST(TransferTest, ContextSensitiveMethodLiteral) { std::string Code = R"( class MyClass { From bfa6444a332f82843f9fa44821d68fcc772e0272 Mon Sep 17 00:00:00 2001 From: Alexander Richardson Date: Thu, 7 Mar 2024 23:24:59 -0800 Subject: [PATCH 157/158] [compiler-rt] Fix incorrect usages of check_cxx_compiler_flag These checks have been broken since 6afe972195454a1110ed8d20c6f2a547e6366379. The check_cxx_compiler_flag macro only takes two arguments and passing three to it ends up calling `cmake_check_compiler_flag(CXX "${_FLAG}" ${_RESULT})` with ${_FLAG} equal to `-Werror` and the result variable being the actually tested compiler flag. I noticed this because some of the flags that I know should be supported were being flagged as not supported. `--debug-trycompile` shows the following surprising line in the generated CMakeLists.txt: `add_definitions([==[-D-Wempty-body]==] [==[-Werror]==])` which then results in the following error while running the check: ``` FAILED: CMakeFiles/cmTC_72736.dir/src.cxx.o tmp/upstream-llvm-readonly/bin/clang++ -nodefaultlibs -std=c++17 -fcolor-diagnostics -D-Wempty-body -Werror -MD -MT CMakeFiles/cmTC_72736.dir/src.cxx.o -MF CMakeFiles/cmTC_72736.dir/src.cxx.o.d -o CMakeFiles/cmTC_72736.dir/src.cxx.o -c .../cmake-build-all-sanitizers/CMakeFiles/CMakeScratch/TryCompile-nyh3QR/src.cxx In file included from :450: :1:9: error: macro name must be an identifier 1 | #define -Wempty-body 1 | ^ 1 error generated. ``` It would be great if CMake could be a bit more helpful here so I've filed https://gitlab.kitware.com/cmake/cmake/-/issues/25735. See also https://reviews.llvm.org/D146920. Reviewed By: nikic Pull Request: https://github.com/llvm/llvm-project/pull/83779 --- compiler-rt/cmake/config-ix.cmake | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 2ca18ebb4ad489..4f47142850a55e 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -103,7 +103,7 @@ check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG) check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG) check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG) check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG) -check_cxx_compiler_flag(-Werror -ftrivial-auto-var-init=pattern COMPILER_RT_HAS_TRIVIAL_AUTO_INIT) +check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT) if(NOT WIN32 AND NOT CYGWIN) # MinGW warns if -fvisibility-inlines-hidden is used. @@ -150,21 +150,21 @@ check_cxx_compiler_flag(/wd4391 COMPILER_RT_HAS_WD4391_FLAG) check_cxx_compiler_flag(/wd4722 COMPILER_RT_HAS_WD4722_FLAG) check_cxx_compiler_flag(/wd4800 COMPILER_RT_HAS_WD4800_FLAG) -check_cxx_compiler_flag(-Werror -Warray-bounds COMPILER_RT_HAS_ARRAY_BOUNDS_FLAG) -check_cxx_compiler_flag(-Werror -Wuninitialized COMPILER_RT_HAS_UNINITIALIZED_FLAG) -check_cxx_compiler_flag(-Werror -Wshadow COMPILER_RT_HAS_SHADOW_FLAG) -check_cxx_compiler_flag(-Werror -Wempty-body COMPILER_RT_HAS_EMPTY_BODY_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-pointer-memaccess COMPILER_RT_HAS_SIZEOF_POINTER_MEMACCESS_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-argument COMPILER_RT_HAS_SIZEOF_ARRAY_ARGUMENT_FLAG) -check_cxx_compiler_flag(-Werror -Wsuspicious-memaccess COMPILER_RT_HAS_SUSPICIOUS_MEMACCESS_FLAG) -check_cxx_compiler_flag(-Werror -Wbuiltin-memcpy-chk-size COMPILER_RT_HAS_BUILTIN_MEMCPY_CHK_SIZE_FLAG) -check_cxx_compiler_flag(-Werror -Warray-bounds-pointer-arithmetic COMPILER_RT_HAS_ARRAY_BOUNDS_POINTER_ARITHMETIC_FLAG) -check_cxx_compiler_flag(-Werror -Wreturn-stack-address COMPILER_RT_HAS_RETURN_STACK_ADDRESS_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-decay COMPILER_RT_HAS_SIZEOF_ARRAY_DECAY_FLAG) -check_cxx_compiler_flag(-Werror -Wformat-insufficient-args COMPILER_RT_HAS_FORMAT_INSUFFICIENT_ARGS_FLAG) -check_cxx_compiler_flag(-Werror -Wformat-security COMPILER_RT_HAS_BUILTIN_FORMAL_SECURITY_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-div COMPILER_RT_HAS_SIZEOF_ARRAY_DIV_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-pointer-div COMPILER_RT_HAS_SIZEOF_POINTER_DIV_FLAG) +check_cxx_compiler_flag("-Werror -Warray-bounds" COMPILER_RT_HAS_ARRAY_BOUNDS_FLAG) +check_cxx_compiler_flag("-Werror -Wuninitialized" COMPILER_RT_HAS_UNINITIALIZED_FLAG) +check_cxx_compiler_flag("-Werror -Wshadow" COMPILER_RT_HAS_SHADOW_FLAG) +check_cxx_compiler_flag("-Werror -Wempty-body" COMPILER_RT_HAS_EMPTY_BODY_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-pointer-memaccess" COMPILER_RT_HAS_SIZEOF_POINTER_MEMACCESS_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-argument" COMPILER_RT_HAS_SIZEOF_ARRAY_ARGUMENT_FLAG) +check_cxx_compiler_flag("-Werror -Wsuspicious-memaccess" COMPILER_RT_HAS_SUSPICIOUS_MEMACCESS_FLAG) +check_cxx_compiler_flag("-Werror -Wbuiltin-memcpy-chk-size" COMPILER_RT_HAS_BUILTIN_MEMCPY_CHK_SIZE_FLAG) +check_cxx_compiler_flag("-Werror -Warray-bounds-pointer-arithmetic" COMPILER_RT_HAS_ARRAY_BOUNDS_POINTER_ARITHMETIC_FLAG) +check_cxx_compiler_flag("-Werror -Wreturn-stack-address" COMPILER_RT_HAS_RETURN_STACK_ADDRESS_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-decay" COMPILER_RT_HAS_SIZEOF_ARRAY_DECAY_FLAG) +check_cxx_compiler_flag("-Werror -Wformat-insufficient-args" COMPILER_RT_HAS_FORMAT_INSUFFICIENT_ARGS_FLAG) +check_cxx_compiler_flag("-Werror -Wformat-security" COMPILER_RT_HAS_BUILTIN_FORMAL_SECURITY_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-div" COMPILER_RT_HAS_SIZEOF_ARRAY_DIV_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-pointer-div" COMPILER_RT_HAS_SIZEOF_POINTER_DIV_FLAG) # Symbols. check_symbol_exists(__func__ "" COMPILER_RT_HAS_FUNC_SYMBOL) From 88698cce42bc120d8b88f8592889febc0a32cfde Mon Sep 17 00:00:00 2001 From: Jose Lopes Date: Wed, 7 Aug 2024 16:56:05 +0100 Subject: [PATCH 158/158] Fix merge problem --- mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index 07fb017e8e96a1..a729bc99b987cd 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -148,7 +148,7 @@ std::unique_ptr createBufferLoopHoistingPass(); // Options struct for BufferResultsToOutParams pass. // Note: defined only here, not in tablegen. -struct BufferResultsToOutParamsOptions { +struct BufferResultsToOutParamsOpts { /// Memcpy function: Generate a memcpy between two memrefs. using MemCpyFn = std::function;