From d9fd677d792c6d13da87ea3f49ed9e8cf6cc95b1 Mon Sep 17 00:00:00 2001
From: Adam Siemieniuk <adam.siemieniuk@intel.com>
Date: Thu, 25 Apr 2024 10:43:55 +0200
Subject: [PATCH] Bump LLVM (#907)

Also, retires more method cast variants and updates omp tests with
updated loop constructs.
Fixes LLVM build check after refactor.
---
 build_tools/llvm_version.txt                  |  2 +-
 include/TPP/IR/StructuredOpMatcher.h          |  4 +-
 .../ConvertXsmmToFunc/ConvertXsmmToFunc.cpp   |  4 +-
 lib/TPP/GPU/GpuVulkanAbi.cpp                  |  4 +-
 lib/TPP/Transforms/TransformUtils.cpp         |  6 +--
 lib/TPP/Transforms/Utils/ValueUtils.cpp       |  2 +-
 scripts/buildkite/build_llvm.sh               |  2 +-
 .../pass-convert-gemm-to-parallel-tile.mlir   | 13 +++---
 .../pass-convert-mlp-to-parallel-tile.mlir    | 41 ++++++++++---------
 tools/tpp-run/MLIRBench.cpp                   |  4 +-
 10 files changed, 42 insertions(+), 40 deletions(-)
diff --git a/build_tools/llvm_version.txt b/build_tools/llvm_version.txt
index 31885f70d..dcc4a5864 100644
--- a/build_tools/llvm_version.txt
+++ b/build_tools/llvm_version.txt
@@ -1 +1 @@
-4e0b8eae4cb4328f98e6b748c31050a704d378f6
+fe47e8ff3ae7fc8975eaade6bfa6679737c28b93
diff --git a/include/TPP/IR/StructuredOpMatcher.h b/include/TPP/IR/StructuredOpMatcher.h
index f9bc9e6ee..7b68e5010 100644
--- a/include/TPP/IR/StructuredOpMatcher.h
+++ b/include/TPP/IR/StructuredOpMatcher.h
@@ -166,7 +166,7 @@ struct HasStaticShape {
 
   bool operator()(OpOperand *operand, Operation *op) const {
     auto operandType = operand->get().getType();
-    if (auto shapedType = operandType.dyn_cast_or_null<ShapedType>()) {
+    if (auto shapedType = dyn_cast_or_null<ShapedType>(operandType)) {
       if (!shapedType.hasStaticShape())
         return false;
       if (shape) {
@@ -188,7 +188,7 @@ struct HasStaticStrides {
   bool operator()(OpOperand *operand, Operation *op) const {
     auto operandType = operand->get().getType();
     SmallVector<int64_t> strides;
-    if (auto memRefType = operandType.dyn_cast_or_null<MemRefType>()) {
+    if (auto memRefType = dyn_cast_or_null<MemRefType>(operandType)) {
       int64_t offset;
       if (failed(getStridesAndOffset(memRefType, strides, offset)))
         return false;
diff --git a/lib/TPP/Conversion/ConvertXsmmToFunc/ConvertXsmmToFunc.cpp b/lib/TPP/Conversion/ConvertXsmmToFunc/ConvertXsmmToFunc.cpp
index 84183c3f8..208e3f0ad 100644
--- a/lib/TPP/Conversion/ConvertXsmmToFunc/ConvertXsmmToFunc.cpp
+++ b/lib/TPP/Conversion/ConvertXsmmToFunc/ConvertXsmmToFunc.cpp
@@ -42,7 +42,7 @@ static SmallVector<Type> extractInvokeOperandTypes(OpBuilder &builder,
   results.push_back(integer64);
   for (Value operand : operands) {
     Type operandType = operand.getType();
-    if (auto memrefType = operandType.dyn_cast<MemRefType>()) {
+    if (auto memrefType = dyn_cast<MemRefType>(operandType)) {
       // TODO: non-POD will require an LLVMTypeConverter.
       Type basePtrType = LLVM::LLVMPointerType::get(builder.getContext());
       results.push_back(basePtrType);
@@ -65,7 +65,7 @@ static SmallVector<Value> getOperands(OpBuilder &builder, Location loc,
       builder.create<arith::ConstantOp>(loc, integer64, dataTypeAttr));
 
   for (Value operand : operands) {
-    auto memrefType = operand.getType().dyn_cast<MemRefType>();
+    auto memrefType = dyn_cast<MemRefType>(operand.getType());
     if (!memrefType) {
       res.push_back(operand);
       continue;
diff --git a/lib/TPP/GPU/GpuVulkanAbi.cpp b/lib/TPP/GPU/GpuVulkanAbi.cpp
index 51ebfbaaa..fbdadb80e 100644
--- a/lib/TPP/GPU/GpuVulkanAbi.cpp
+++ b/lib/TPP/GPU/GpuVulkanAbi.cpp
@@ -60,7 +60,7 @@ static Type getVulkanTypeWrapper(Type type,
   assert(!isa<TensorType>(type) && "Tensors are not supported by Vulkan");
 
   // Buffers are already Vulkan compatible.
-  if (auto memrefType = type.dyn_cast<MemRefType>())
+  if (auto memrefType = dyn_cast<MemRefType>(type))
     return FlattenMemrefType(memrefType);
 
   // Index has to be converted to a fixed-size integer.
@@ -120,7 +120,7 @@ static Value FlattenMemrefOperand(Value operand, RewriterBase &rewriter) {
   auto loc = operand.getLoc();
 
   // Ignore non-memref types and 1D buffers.
-  auto memrefType = operand.getType().dyn_cast<MemRefType>();
+  auto memrefType = dyn_cast<MemRefType>(operand.getType());
   if (!memrefType || memrefType.getRank() <= 1)
     return operand;
 
diff --git a/lib/TPP/Transforms/TransformUtils.cpp b/lib/TPP/Transforms/TransformUtils.cpp
index 170b2b86e..c9e2fd0dd 100644
--- a/lib/TPP/Transforms/TransformUtils.cpp
+++ b/lib/TPP/Transforms/TransformUtils.cpp
@@ -410,21 +410,21 @@ struct ConvertToForAll : public OpRewritePattern<scf::ForOp> {
               Value destVal = mapping.lookup(insertSlice.getDest());
               SmallVector<OpFoldResult> offsets;
               for (OpFoldResult offset : insertSlice.getMixedOffsets()) {
-                if (auto valueOffset = offset.dyn_cast<Value>())
+                if (auto valueOffset = dyn_cast<Value>(offset))
                   offsets.push_back(mapping.lookupOrDefault(valueOffset));
                 else
                   offsets.push_back(offset);
               }
               SmallVector<OpFoldResult> sizes;
               for (OpFoldResult size : insertSlice.getMixedSizes()) {
-                if (auto valueSize = size.dyn_cast<Value>())
+                if (auto valueSize = dyn_cast<Value>(size))
                   sizes.push_back(mapping.lookupOrDefault(valueSize));
                 else
                   sizes.push_back(size);
               }
               SmallVector<OpFoldResult> strides;
               for (OpFoldResult stride : insertSlice.getMixedStrides()) {
-                if (auto valueStride = stride.dyn_cast<Value>())
+                if (auto valueStride = dyn_cast<Value>(stride))
                   strides.push_back(mapping.lookupOrDefault(valueStride));
                 else
                   strides.push_back(stride);
diff --git a/lib/TPP/Transforms/Utils/ValueUtils.cpp b/lib/TPP/Transforms/Utils/ValueUtils.cpp
index 762fe912c..f76894591 100644
--- a/lib/TPP/Transforms/Utils/ValueUtils.cpp
+++ b/lib/TPP/Transforms/Utils/ValueUtils.cpp
@@ -118,7 +118,7 @@ FailureOr<SmallVector<int64_t>> getStaticStrides(Value value) {
 
 std::pair<Value, Value> getPtrAndOffset(OpBuilder &builder, Value operand,
                                         Location loc) {
-  auto memrefType = operand.getType().dyn_cast<MemRefType>();
+  auto memrefType = dyn_cast<MemRefType>(operand.getType());
   assert(memrefType && "Expect a memref value");
   MemRefType baseMemrefType = MemRefType::get({}, memrefType.getElementType());
   Type basePtrType = builder.getIndexType();
diff --git a/scripts/buildkite/build_llvm.sh b/scripts/buildkite/build_llvm.sh
index 8be41c8e2..3841648af 100755
--- a/scripts/buildkite/build_llvm.sh
+++ b/scripts/buildkite/build_llvm.sh
@@ -141,7 +141,7 @@ fi
 
 # Check LLVM
 echo "--- CHECK"
-echo_run ninja -C ${LLVM_BUILD_DIR} check-tpp
+echo_run ninja -C ${LLVM_BUILD_DIR} check-all
 if [ $? != 0 ]; then
   exit 1
 fi
diff --git a/test/Passes/pass-convert-gemm-to-parallel-tile.mlir b/test/Passes/pass-convert-gemm-to-parallel-tile.mlir
index e6f75a897..f9e132da4 100644
--- a/test/Passes/pass-convert-gemm-to-parallel-tile.mlir
+++ b/test/Passes/pass-convert-gemm-to-parallel-tile.mlir
@@ -28,10 +28,11 @@ module {
 // CHECK: %[[c0_i64:.*]] = arith.constant 0 : i64
 // CHECK: %[[temp0:.*]] = call @xsmm_brgemm_dispatch(%[[c1_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c1024_i64]], %[[c1024_i64]], %[[c0_i64]])
 // CHECK:    omp.parallel {
-// CHECK:      omp.wsloop for  (%[[ARG3:.*]], %[[ARG4:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c8]]) {
-// CHECK:        memref.alloca_scope  {
-// CHECK:          scf.for %[[ARG5:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
-// CHECK:	     %[[temp1:.*]] = arith.addi %[[ARG5]], %[[ARG3]] : index
-// CHECK:            scf.for %[[ARG6:.*]] = %[[c0]] to %[[c8]] step %[[c1]] {
-// CHECK:              %[[temp2:.*]] = arith.addi %[[ARG6]], %[[ARG4]] : index
+// CHECK:      omp.wsloop {
+// CHECK:        omp.loop_nest (%[[ARG3:.*]], %[[ARG4:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c8]]) {
+// CHECK:          memref.alloca_scope  {
+// CHECK:            scf.for %[[ARG5:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
+// CHECK:	       %[[temp1:.*]] = arith.addi %[[ARG5]], %[[ARG3]] : index
+// CHECK:              scf.for %[[ARG6:.*]] = %[[c0]] to %[[c8]] step %[[c1]] {
+// CHECK:                %[[temp2:.*]] = arith.addi %[[ARG6]], %[[ARG4]] : index
 
diff --git a/test/Passes/pass-convert-mlp-to-parallel-tile.mlir b/test/Passes/pass-convert-mlp-to-parallel-tile.mlir
index 2227dc44f..09567de45 100644
--- a/test/Passes/pass-convert-mlp-to-parallel-tile.mlir
+++ b/test/Passes/pass-convert-mlp-to-parallel-tile.mlir
@@ -79,26 +79,27 @@ module {
 //CHECK: %[[c4_i64:.*]] = arith.constant 4 : i64
 //CHECK: %[[temp0:.*]] = call @xsmm_fused_brgemm_dispatch(%[[c1_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c32_i64]], %[[c1024_i64]], %[[c1024_i64]], %[[c0_i64]], %[[c0_i64]], %[[c5_i64]], %[[c4_i64]], %[[c1_i64]])
 //CHECK:  omp.parallel {
-//CHECK:      omp.wsloop for  (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
-//CHECK:        memref.alloca_scope  {
-//CHECK:          scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
-//CHECK:           %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index              
-//CHECK:           scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
-//CHECK:              %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
+//CHECK:      omp.wsloop {
+//CHECK:        omp.loop_nest (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
+//CHECK:          memref.alloca_scope  {
+//CHECK:            scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
+//CHECK:             %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index
+//CHECK:             scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
+//CHECK:                %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
 //CHECK:  omp.parallel {
-//CHECK:      omp.wsloop for  (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
-//CHECK:        memref.alloca_scope  {
-//CHECK:          scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
-//CHECK:           %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index
-//CHECK:           scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
-//CHECK:              %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
+//CHECK:      omp.wsloop {
+//CHECK:        omp.loop_nest (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
+//CHECK:          memref.alloca_scope  {
+//CHECK:            scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
+//CHECK:             %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index
+//CHECK:             scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
+//CHECK:                %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
 //CHECK:  omp.parallel {
-//CHECK:      omp.wsloop for  (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
-//CHECK:        memref.alloca_scope  {
-//CHECK:          scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
-//CHECK:           %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index
-//CHECK:           scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
-//CHECK:              %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
-
-
+//CHECK:      omp.wsloop {
+//CHECK:        omp.loop_nest (%[[ARG10:.*]], %[[ARG11:.*]]) : index = (%[[c0]], %[[c0]]) to (%[[c8]], %[[c32]]) step (%[[c2]], %[[c16]]) {
+//CHECK:          memref.alloca_scope  {
+//CHECK:            scf.for %[[ARG12:.*]] = %[[c0]] to %[[c2]] step %[[c1]] {
+//CHECK:             %[[temp1:.*]] = arith.addi %[[ARG12]], %[[ARG10]] : index
+//CHECK:             scf.for %[[ARG13:.*]] = %[[c0]] to %[[c16]] step %[[c1]] {
+//CHECK:                %[[temp2:.*]] = arith.addi %[[ARG13]], %[[ARG11]] : index
 
diff --git a/tools/tpp-run/MLIRBench.cpp b/tools/tpp-run/MLIRBench.cpp
index 48802346b..406258edd 100644
--- a/tools/tpp-run/MLIRBench.cpp
+++ b/tools/tpp-run/MLIRBench.cpp
@@ -165,7 +165,7 @@ LogicalResult MLIRBench::replaceSplatWithRandom() {
     auto constant = dyn_cast<arith::ConstantOp>(op);
     if (!constant)
       continue;
-    auto cstType = constant.getType().dyn_cast<ShapedType>();
+    auto cstType = dyn_cast<ShapedType>(constant.getType());
     if (!cstType)
       continue;
     auto newAttr = replaceSplat(cstType, constant.getValueAttr());
@@ -318,7 +318,7 @@ void MLIRBench::printMean(Value mean) {
 
 void MLIRBench::printVector(Value vector) {
   auto op = vector;
-  auto vectorValue = vector.getType().dyn_cast<VectorType>();
+  auto vectorValue = dyn_cast<VectorType>(vector.getType());
   if (vectorValue.getElementType().isBF16()) {
     VectorType vecType =
         VectorType::get(vectorValue.getShape(), builder.getF32Type());