Xilinx · mgehre-amd · Dec 18, 2024 · Oct 1, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/build_tools/python_deploy/build_linux_packages.sh b/build_tools/python_deploy/build_linux_packages.sh
@@ -50,7 +50,7 @@ TM_PYTHON_VERSIONS="${TM_PYTHON_VERSIONS:-cp38-cp38 cp310-cp310 cp311-cp311}"
 # Location to store Release wheels
 TM_OUTPUT_DIR="${TM_OUTPUT_DIR:-${this_dir}/wheelhouse}"
 # What "packages to build"
-TM_PACKAGES="${TM_PACKAGES:-torch-mlir torch-mlir-core}"
+TM_PACKAGES="${TM_PACKAGES:-torch-mlir torch-mlir-ext}"
 # Use pre-built Pytorch
 TM_USE_PYTORCH_BINARY="${TM_USE_PYTORCH_BINARY:-ON}"
 # Skip running tests if you want quick iteration
@@ -83,12 +83,12 @@ function run_on_host() {
   fi
   mkdir -p "${TM_OUTPUT_DIR}"
   case "$package" in
-    torch-mlir)
+    torch-mlir-ext)
       TM_CURRENT_DOCKER_IMAGE=${TM_RELEASE_DOCKER_IMAGE}
       export USERID=0
       export GROUPID=0
       ;;
-    torch-mlir-core)
+    torch-mlir)
       TM_CURRENT_DOCKER_IMAGE=${TM_RELEASE_DOCKER_IMAGE}
       export USERID=0
       export GROUPID=0
@@ -158,22 +158,22 @@ function run_in_docker() {
       export PATH=$python_dir/bin:$orig_path
       echo ":::: Python version $(python3 --version)"
       case "$package" in
-        torch-mlir)
-          clean_wheels torch_mlir "$python_version"
-          build_torch_mlir "$TM_TORCH_VERSION"
+        torch-mlir-ext)
+          clean_wheels torch_mlir_ext "$python_version"
+          build_torch_mlir_ext "$TM_TORCH_VERSION"
 
           # Disable audit wheel until we can fix ODR torch issues.  See
           # https://github.com/llvm/torch-mlir/issues/1709
           #
-          #run_audit_wheel torch_mlir "$python_version"
+          #run_audit_wheel torch_mlir_ext "$python_version"
 
-          clean_build torch_mlir "$python_version"
+          clean_build torch_mlir_ext "$python_version"
           ;;
-        torch-mlir-core)
-          clean_wheels torch_mlir_core "$python_version"
-          build_torch_mlir_core
-          run_audit_wheel torch_mlir_core "$python_version"
-          clean_build torch_mlir_core "$python_version"
+        torch-mlir)
+          clean_wheels torch_mlir "$python_version"
+          build_torch_mlir
+          run_audit_wheel torch_mlir "$python_version"
+          clean_build torch_mlir "$python_version"
           ;;
         out-of-tree)
           setup_venv "$python_version" "$TM_TORCH_VERSION"
@@ -431,7 +431,7 @@ function clean_build() {
   rm -rf /main_checkout/torch-mlir/build /main_checkout/torch-mlir/llvm-build /main_checkout/torch-mlir/docker_venv  /main_checkout/torch-mlir/libtorch
 }
 
-function build_torch_mlir() {
+function build_torch_mlir_ext() {
   # Disable LTC build for releases
   export TORCH_MLIR_ENABLE_LTC=0
   local torch_version="$1"
@@ -470,7 +470,9 @@ function run_audit_wheel() {
   rm "$generic_wheel"
 }
 
-function build_torch_mlir_core() {
+function build_torch_mlir() {
+  # Disable LTC build for releases
+  export TORCH_MLIR_ENABLE_LTC=0
   python -m pip install --no-cache-dir -r /main_checkout/torch-mlir/build-requirements.txt
   CMAKE_GENERATOR=Ninja \
   TORCH_MLIR_PYTHON_PACKAGE_VERSION=${TORCH_MLIR_PYTHON_PACKAGE_VERSION} \

diff --git a/build_tools/python_deploy/build_macos_packages.sh b/build_tools/python_deploy/build_macos_packages.sh
@@ -56,16 +56,16 @@ function run() {
       export PATH=$python_dir/bin:$orig_path
       echo ":::: Python version $(python3 --version)"
       case "$package" in
+        torch-mlir-ext)
+          clean_wheels torch_mlir_ext "$python_version"
+          build_torch_mlir_ext torch_mlir_ext "$python_version"
+          run_audit_wheel torch_mlir_ext "$python_version"
+          ;;
         torch-mlir)
           clean_wheels torch_mlir "$python_version"
           build_torch_mlir torch_mlir "$python_version"
           run_audit_wheel torch_mlir "$python_version"
           ;;
-        torch-mlir-core)
-          clean_wheels torch_mlir_core "$python_version"
-          build_torch_mlir_core torch_mlir_core "$python_version"
-          run_audit_wheel torch_mlir_core "$python_version"
-          ;;
         *)
           echo "Unrecognized package '$package'"
           exit 1
@@ -75,7 +75,7 @@ function run() {
   done
 }
 
-function build_torch_mlir() {
+function build_torch_mlir_ext() {
   local wheel_basename="$1"
   local python_version="$2"
   rm -rf "$output_dir"/build_venv
@@ -93,7 +93,7 @@ function build_torch_mlir() {
   rm -rf "$output_dir"/build_venv
 }
 
-function build_torch_mlir_core() {
+function build_torch_mlir() {
   local wheel_basename="$1"
   local python_version="$2"
   rm -rf "$output_dir"/build_venv

diff --git a/docs/development.md b/docs/development.md
@@ -14,7 +14,7 @@ While this is running, you can already setup the Python venv and dependencies in
 ## Setup your Python VirtualEnvironment and Dependencies
 
 ```shell
-python -m venv mlir_venv
+python3 -m venv mlir_venv
 source mlir_venv/bin/activate
 # Some older pip installs may not be able to handle the recent PyTorch deps
 python -m pip install --upgrade pip

diff --git a/include/torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h b/include/torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h
@@ -338,6 +338,31 @@ struct OpBinder {
     return failure();
   }
 
+  ParseResult f32FloatArrayAttr(llvm::SmallVector<float> &values,
+                                StringRef nameSuffix,
+                                ArrayRef<float> defaults) {
+    SmallString<64> name("torch.onnx.");
+    name.append(nameSuffix);
+    auto attr = op->getAttr(name);
+    if (!attr) {
+      values.append(defaults.begin(), defaults.end());
+      return success();
+    }
+    if (auto arrayAttr = dyn_cast<ArrayAttr>(attr)) {
+      for (auto element : arrayAttr) {
+        auto floatAttr = dyn_cast<FloatAttr>(element);
+        if (!floatAttr)
+          return failure();
+        FloatType t = cast<FloatType>(floatAttr.getType());
+        if (t.getWidth() != 32)
+          return failure();
+        values.push_back(floatAttr.getValue().convertToFloat());
+      }
+      return success();
+    }
+    return failure();
+  }
+
   ParseResult stringArrayAttr(llvm::SmallVector<std::string> &values,
                               StringRef nameSuffix) {
     SmallString<64> name("torch.onnx.");

diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp
@@ -4521,6 +4521,7 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
         llvm::SmallVector<int64_t> ngram_counts;
         llvm::SmallVector<int64_t> ngram_indexes;
         llvm::SmallVector<int64_t> pool_int64s;
+        llvm::SmallVector<float> weights;
         std::string mode;
         int64_t min_gram_length;
         int64_t max_gram_length;
@@ -4538,9 +4539,10 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
             binder.tensorOperand(input) || binder.tensorResultType(resultType))
           return failure();
 
-        if (mode != "TF")
-          return rewriter.notifyMatchFailure(binder.op,
-                                             "TF mode supported only");
+        llvm::SmallVector<float> defaultWeights(ngram_indexes.size(), 1.0f);
+        if (binder.f32FloatArrayAttr(weights, "weights", defaultWeights))
+          return failure();
+
         if (pool_int64s.size() == 0)
           return rewriter.notifyMatchFailure(
               binder.op, "pool_int64s empty, only integers supported");
@@ -4766,9 +4768,36 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
                     binder.getLoc(), loopConditionTrue, ValueRange({count}));
               }
               count = skipLoop.getResult(0);
-              // insert count "tf" into output
               Value countFloat = rewriter.create<Torch::AtenFloatScalarOp>(
                   binder.getLoc(), count);
+              if (mode == "IDF" || mode == "TFIDF") {
+                // both IDF and TFIDF modes use weights
+                float weight = weights[ngram_i];
+                Value constWeight = rewriter.create<Torch::ConstantFloatOp>(
+                    binder.getLoc(), rewriter.getF64FloatAttr(weight));
+
+                // TFIDF
+                Value multiplier = countFloat;
+                if (mode == "IDF") {
+                  // All the counts larger than 1 would be truncated to 1
+                  // and the i-th element in weights would be used to scale
+                  // (by multiplication) the count of the i-th n-gram in pool.
+
+                  Value intCount = rewriter.create<Torch::AtenIntScalarOp>(
+                      binder.getLoc(), count);
+                  // compare intCount > 0
+                  Value gtZeroCount = rewriter.create<Torch::AtenGtIntOp>(
+                      binder.getLoc(), intCount, zero);
+                  gtZeroCount = rewriter.create<Torch::AtenIntBoolOp>(
+                      binder.getLoc(), gtZeroCount);
+                  Value gtZeroCountFloat =
+                      rewriter.create<Torch::AtenFloatScalarOp>(binder.getLoc(),
+                                                                gtZeroCount);
+                  multiplier = gtZeroCountFloat;
+                }
+                countFloat = rewriter.create<Torch::AtenMulFloatOp>(
+                    binder.getLoc(), multiplier, constWeight);
+              }
               Value dataList = rewriter.create<Torch::PrimListConstructOp>(
                   binder.getLoc(),
                   rewriter.getType<Torch::ListType>(

diff --git a/lib/Conversion/TorchOnnxToTorch/OnnxRecurrentLayerOpExpanders.cpp b/lib/Conversion/TorchOnnxToTorch/OnnxRecurrentLayerOpExpanders.cpp
@@ -1072,11 +1072,10 @@ LogicalResult OnnxGruExpander(OpBinder binder,
   Value cstNone = b.create<ConstantNoneOp>();
   Value cstZero = b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(0));
   Value cstOne = b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(1));
-  Value cstTwo = b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(2));
 
   // Binding arguments
   ValueTensorType yTy, Y_hType;
-  if (binder.tensorResultTypeAtIndex(yTy, 0) ||
+  if (binder.tensorResultTypeAtIndex(yTy, 0) &&
       binder.tensorResultTypeAtIndex(Y_hType, 1)) {
     return rewriter.notifyMatchFailure(binder.op,
                                        "At least one output must be present");
@@ -1132,6 +1131,7 @@ LogicalResult OnnxGruExpander(OpBinder binder,
   // Validations
   auto XShape = xTy.getSizes();
   int64_t batch_size = (layout == 0) ? XShape[1] : XShape[0];
+  int64_t seq_len = (layout == 0) ? XShape[0] : XShape[1];
   int64_t input_size = XShape[2];
 
   std::ostringstream oss;
@@ -1173,6 +1173,10 @@ LogicalResult OnnxGruExpander(OpBinder binder,
     Value cstDtype = getDtypeIntValueForType(rewriter, loc, xTy.getDtype());
     initial_h =
         b.create<AtenZerosOp>(hTy, hShape, cstDtype, cstNone, cstNone, cstNone);
+  } else {
+    if (layout == 1) {
+      initial_h = StaticTranspose(b, initial_h, 0, 1);
+    }
   }
 
   if (binder.tensorOperandAtIndex(sequence_lens, 4))
@@ -1192,10 +1196,10 @@ LogicalResult OnnxGruExpander(OpBinder binder,
   // fill in B
   Value cstXDtype = getDtypeIntValueForType(rewriter, loc, xTy.getDtype());
   if (B == nullptr) {
-    SmallVector<int64_t> BShape = {num_directions, 2 * hidden_size};
+    SmallVector<int64_t> BShape = {num_directions, 6 * hidden_size};
     SmallVector<Value> BShapeListContents = {
         b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(num_directions)),
-        b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(2 * hidden_size))};
+        b.create<ConstantIntOp>(intType, b.getI64IntegerAttr(6 * hidden_size))};
     Value BShapeList = b.create<PrimListConstructOp>(
         b.getType<ListType>(intType), BShapeListContents);
     auto BType = b.getType<ValueTensorType>(BShape, wTy.getDtype());
@@ -1256,51 +1260,47 @@ LogicalResult OnnxGruExpander(OpBinder binder,
                       B_slices[4], B_slices[5]);
 
   // Process inputs based on layout
-  Value X_processed, initial_h_processed;
-  ValueTensorType yTy_processed, Y_hType_processed;
-
-  if (layout == 0) {
-    X_processed = X;
-    initial_h_processed = initial_h_forward;
-    yTy_processed = yTy;
-    Y_hType_processed = Y_hType;
-  } else {
-    X_processed = b.create<AtenTransposeIntOp>(X.getType(), X, cstZero, cstOne);
-    initial_h_processed = b.create<AtenTransposeIntOp>(
-        initial_h.getType(), initial_h_forward, cstZero, cstOne);
-
-    auto yTySizes = yTy.getSizes();
-    auto Y_hTypeSizes = Y_hType.getSizes();
-
-    yTy_processed = b.getType<ValueTensorType>(
-        llvm::SmallVector<int64_t>{yTySizes[1], yTySizes[0], yTySizes[2],
-                                   yTySizes[3]},
-        yTy.getDtype());
-
-    Y_hType_processed = b.getType<ValueTensorType>(
-        llvm::SmallVector<int64_t>{Y_hTypeSizes[1], Y_hTypeSizes[0],
-                                   Y_hTypeSizes[2]},
-        Y_hType.getDtype());
+  if (layout == 1) {
+    X = StaticTranspose(b, X, 0, 1);
   }
 
   // Weights and biases ready. Calling GRU layer to insert the actual ops.
-  GruLayerOutput gruLayerOutput =
-      gru_layer(b, X_processed, initial_h_processed, weights, activations,
-                linear_before_reset);
+  GruLayerOutput gruLayerOutput = gru_layer(b, X, initial_h_forward, weights,
+                                            activations, linear_before_reset);
 
   // Process outputs based on layout
-  Value Y_final, Y_h_final;
-  if (layout == 0) {
-    Y_final = b.create<AtenUnsqueezeOp>(yTy, gruLayerOutput.Y, cstOne);
-    Y_h_final = b.create<AtenUnsqueezeOp>(Y_hType, gruLayerOutput.Y_h, cstZero);
+  Value Y_final;
+  if (binder.tensorResultTypeAtIndex(yTy, 0)) {
+    Y_final = cstNone;
   } else {
-    auto Y_transposed = b.create<AtenTransposeIntOp>(
-        gruLayerOutput.Y.getType(), gruLayerOutput.Y, cstZero, cstOne);
-    Y_final = b.create<AtenUnsqueezeOp>(yTy, Y_transposed, cstTwo);
+    if (layout == 0) {
+      Y_final = b.create<AtenUnsqueezeOp>(yTy, gruLayerOutput.Y, cstOne);
+    } else {
+      Type yTy_original = b.getType<ValueTensorType>(
+          llvm::SmallVector<int64_t>{seq_len, 1, batch_size, hidden_size},
+          yTy.getDtype());
+      Y_final =
+          b.create<AtenUnsqueezeOp>(yTy_original, gruLayerOutput.Y, cstOne);
+      Y_final = StaticTranspose(b, Y_final, 1, 2);
+      Y_final = StaticTranspose(b, Y_final, 0, 1);
+    }
+  }
 
-    auto Y_h_transposed = b.create<AtenTransposeIntOp>(
-        gruLayerOutput.Y_h.getType(), gruLayerOutput.Y_h, cstZero, cstOne);
-    Y_h_final = b.create<AtenUnsqueezeOp>(Y_hType, Y_h_transposed, cstZero);
+  Value Y_h_final;
+  if (binder.tensorResultTypeAtIndex(Y_hType, 1)) {
+    Y_h_final = cstNone;
+  } else {
+    if (layout == 0) {
+      Y_h_final =
+          b.create<AtenUnsqueezeOp>(Y_hType, gruLayerOutput.Y_h, cstZero);
+    } else {
+      Type y_hTy_original = b.getType<ValueTensorType>(
+          llvm::SmallVector<int64_t>{1, batch_size, hidden_size},
+          Y_hType.getDtype());
+      Y_h_final = b.create<AtenUnsqueezeOp>(y_hTy_original, gruLayerOutput.Y_h,
+                                            cstZero);
+      Y_h_final = StaticTranspose(b, Y_h_final, 0, 1);
+    }
   }
 
   rewriter.replaceOp(binder.op, mlir::ValueRange{Y_final, Y_h_final});

diff --git a/lib/Dialect/Torch/IR/TorchOps.cpp b/lib/Dialect/Torch/IR/TorchOps.cpp
@@ -5489,8 +5489,11 @@ void BindSymbolicShapeOp::print(OpAsmPrinter &p) {
 }
 
 LogicalResult BindSymbolicShapeOp::verify() {
-  if (getShapeSymbols().empty())
-    return emitOpError() << "requires non-empty shapeSymbols";
+  if (getShapeSymbols().size() !=
+      getShapeExpressions().getValue().getNumSymbols())
+    return emitOpError()
+           << "requires equal number of shape symbol args and symbol args to "
+              "the attached affine map, since they are 1:1 mapped";
 
   for (auto symbol : getShapeSymbols()) {
     Operation *definingOp = symbol.getDefiningOp();

diff --git a/setup.py b/setup.py
@@ -223,13 +223,13 @@ def build_extension(self, ext):
 EXT_MODULES = [
     CMakeExtension("torch_mlir._mlir_libs._torchMlir"),
 ]
-NAME = "torch-mlir-core"
+NAME = "torch-mlir"
 
 # If building PyTorch extensions, customize.
 if not TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS:
     import torch
 
-    NAME = "torch-mlir"
+    NAME = "torch-mlir-ext"
     INSTALL_REQUIRES.extend(
         [
             f"torch=={torch.__version__}".split("+", 1)[0],