From 38ccee00346300c87abc34860398bc950c65eaec Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:43:51 -0400 Subject: [PATCH 01/38] [WASM] Fix for wasi libc build break add tan to RuntimeLibcallSignatureTable (#95082) The wasm backend fetches the tan runtime lib call in `llvm/include/llvm/IR/RuntimeLibcalls.def` via `StaticLibcallNameMap()`, but ignores the runtime function because a function sinature mapping is not specified in RuntimeLibcallSignatureTable(). The fix is to specify the function signatures for float32-128. This is a fix for a build break reported on PR https://github.com/llvm/llvm-project/pull/94559#issuecomment-2159923215. --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 3 + llvm/test/CodeGen/WebAssembly/libcalls.ll | 69 ++++++++++--------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index d9936557776ba1..20e50c8c9e1ae0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -201,6 +201,9 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::COS_F32] = f32_func_f32; Table[RTLIB::COS_F64] = f64_func_f64; Table[RTLIB::COS_F128] = i64_i64_func_i64_i64; + Table[RTLIB::TAN_F32] = f32_func_f32; + Table[RTLIB::TAN_F64] = f64_func_f64; + Table[RTLIB::TAN_F128] = i64_i64_func_i64_i64; Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR; Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR; Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR; diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll index 4f57c347a1a335..70f000664d388a 100644 --- a/llvm/test/CodeGen/WebAssembly/libcalls.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -12,6 +12,7 @@ declare fp128 @llvm.nearbyint.f128(fp128) declare fp128 @llvm.pow.f128(fp128, fp128) declare fp128 @llvm.powi.f128.i32(fp128, i32) +declare double @llvm.tan.f64(double) declare double @llvm.cos.f64(double) declare double @llvm.log10.f64(double) declare double @llvm.pow.f64(double, double) @@ -240,42 +241,44 @@ define double @f64libcalls(double %x, double %y, i32 %z) { ; CHECK: .functype f64libcalls (f64, f64, i32) -> (f64) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: global.get $push11=, __stack_pointer -; CHECK-NEXT: i32.const $push12=, 16 -; CHECK-NEXT: i32.sub $push18=, $pop11, $pop12 -; CHECK-NEXT: local.tee $push17=, 3, $pop18 -; CHECK-NEXT: global.set __stack_pointer, $pop17 -; CHECK-NEXT: local.get $push22=, 0 -; CHECK-NEXT: local.get $push19=, 0 -; CHECK-NEXT: call $push0=, cos, $pop19 -; CHECK-NEXT: call $push1=, log10, $pop0 -; CHECK-NEXT: local.get $push20=, 1 -; CHECK-NEXT: call $push2=, pow, $pop1, $pop20 -; CHECK-NEXT: local.get $push21=, 2 -; CHECK-NEXT: call $push3=, __powidf2, $pop2, $pop21 -; CHECK-NEXT: call $push4=, log, $pop3 -; CHECK-NEXT: call $push5=, exp, $pop4 -; CHECK-NEXT: call $push6=, exp10, $pop5 -; CHECK-NEXT: call $push7=, cbrt, $pop6 -; CHECK-NEXT: call $push8=, lround, $pop7 -; CHECK-NEXT: call $push9=, ldexp, $pop22, $pop8 -; CHECK-NEXT: local.get $push23=, 3 -; CHECK-NEXT: i32.const $push15=, 12 -; CHECK-NEXT: i32.add $push16=, $pop23, $pop15 -; CHECK-NEXT: call $push24=, frexp, $pop9, $pop16 -; CHECK-NEXT: local.set 0, $pop24 -; CHECK-NEXT: local.get $push25=, 3 -; CHECK-NEXT: i32.load $push10=, 12($pop25) -; CHECK-NEXT: call escape_value, $pop10 -; CHECK-NEXT: local.get $push26=, 3 +; CHECK-NEXT: global.get $push12=, __stack_pointer ; CHECK-NEXT: i32.const $push13=, 16 -; CHECK-NEXT: i32.add $push14=, $pop26, $pop13 -; CHECK-NEXT: global.set __stack_pointer, $pop14 -; CHECK-NEXT: local.get $push27=, 0 -; CHECK-NEXT: return $pop27 +; CHECK-NEXT: i32.sub $push19=, $pop12, $pop13 +; CHECK-NEXT: local.tee $push18=, 3, $pop19 +; CHECK-NEXT: global.set __stack_pointer, $pop18 +; CHECK-NEXT: local.get $push23=, 0 +; CHECK-NEXT: local.get $push20=, 0 +; CHECK-NEXT: call $push0=, tan, $pop20 +; CHECK-NEXT: call $push1=, cos, $pop0 +; CHECK-NEXT: call $push2=, log10, $pop1 +; CHECK-NEXT: local.get $push21=, 1 +; CHECK-NEXT: call $push3=, pow, $pop2, $pop21 +; CHECK-NEXT: local.get $push22=, 2 +; CHECK-NEXT: call $push4=, __powidf2, $pop3, $pop22 +; CHECK-NEXT: call $push5=, log, $pop4 +; CHECK-NEXT: call $push6=, exp, $pop5 +; CHECK-NEXT: call $push7=, exp10, $pop6 +; CHECK-NEXT: call $push8=, cbrt, $pop7 +; CHECK-NEXT: call $push9=, lround, $pop8 +; CHECK-NEXT: call $push10=, ldexp, $pop23, $pop9 +; CHECK-NEXT: local.get $push24=, 3 +; CHECK-NEXT: i32.const $push16=, 12 +; CHECK-NEXT: i32.add $push17=, $pop24, $pop16 +; CHECK-NEXT: call $push25=, frexp, $pop10, $pop17 +; CHECK-NEXT: local.set 0, $pop25 +; CHECK-NEXT: local.get $push26=, 3 +; CHECK-NEXT: i32.load $push11=, 12($pop26) +; CHECK-NEXT: call escape_value, $pop11 +; CHECK-NEXT: local.get $push27=, 3 +; CHECK-NEXT: i32.const $push14=, 16 +; CHECK-NEXT: i32.add $push15=, $pop27, $pop14 +; CHECK-NEXT: global.set __stack_pointer, $pop15 +; CHECK-NEXT: local.get $push28=, 0 +; CHECK-NEXT: return $pop28 - %a = call double @llvm.cos.f64(double %x) + %k = call double @llvm.tan.f64(double %x) + %a = call double @llvm.cos.f64(double %k) %b = call double @llvm.log10.f64(double %a) %c = call double @llvm.pow.f64(double %b, double %y) %d = call double @llvm.powi.f64.i32(double %c, i32 %z) From 18cf1cd92b554ba0b870c6a2223ea4d0d3c6dd21 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 11 Jun 2024 07:45:12 -0700 Subject: [PATCH 02/38] [mlir] Add PDL C & Python usage (#94714) Following a rather direct approach to expose PDL usage from C and then Python. This doesn't yes plumb through adding support for custom matchers through this interface, so constrained to basics initially. This also exposes greedy rewrite driver. Only way currently to define patterns is via PDL (just to keep small). The creation of the PDL pattern module could be improved to avoid folks potentially accessing the module used to construct it post construction. No ergonomic work done yet. --------- Signed-off-by: Jacques Pienaar --- mlir/include/mlir-c/Bindings/Python/Interop.h | 21 ++++ mlir/include/mlir-c/Rewrite.h | 60 ++++++++++ .../mlir/Bindings/Python/PybindAdaptors.h | 21 ++++ mlir/lib/Bindings/Python/IRModule.h | 1 + mlir/lib/Bindings/Python/MainModule.cpp | 4 + mlir/lib/Bindings/Python/Rewrite.cpp | 110 ++++++++++++++++++ mlir/lib/Bindings/Python/Rewrite.h | 22 ++++ mlir/lib/CAPI/Transforms/CMakeLists.txt | 3 + mlir/lib/CAPI/Transforms/Rewrite.cpp | 83 +++++++++++++ mlir/python/CMakeLists.txt | 2 + mlir/python/mlir/dialects/pdl.py | 8 +- mlir/python/mlir/rewrite.py | 5 + mlir/test/python/integration/dialects/pdl.py | 67 +++++++++++ .../llvm-project-overlay/mlir/BUILD.bazel | 12 +- .../mlir/python/BUILD.bazel | 7 ++ 15 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 mlir/include/mlir-c/Rewrite.h create mode 100644 mlir/lib/Bindings/Python/Rewrite.cpp create mode 100644 mlir/lib/Bindings/Python/Rewrite.h create mode 100644 mlir/lib/CAPI/Transforms/Rewrite.cpp create mode 100644 mlir/python/mlir/rewrite.py create mode 100644 mlir/test/python/integration/dialects/pdl.py diff --git a/mlir/include/mlir-c/Bindings/Python/Interop.h b/mlir/include/mlir-c/Bindings/Python/Interop.h index 0a36e97c2ae683..a33190c380d37d 100644 --- a/mlir/include/mlir-c/Bindings/Python/Interop.h +++ b/mlir/include/mlir-c/Bindings/Python/Interop.h @@ -39,6 +39,7 @@ #include "mlir-c/IR.h" #include "mlir-c/IntegerSet.h" #include "mlir-c/Pass.h" +#include "mlir-c/Rewrite.h" // The 'mlir' Python package is relocatable and supports co-existing in multiple // projects. Each project must define its outer package prefix with this define @@ -284,6 +285,26 @@ static inline MlirModule mlirPythonCapsuleToModule(PyObject *capsule) { return module; } +/** Creates a capsule object encapsulating the raw C-API + * MlirFrozenRewritePatternSet. + * The returned capsule does not extend or affect ownership of any Python + * objects that reference the module in any way. */ +static inline PyObject * +mlirPythonFrozenRewritePatternSetToCapsule(MlirFrozenRewritePatternSet pm) { + return PyCapsule_New(MLIR_PYTHON_GET_WRAPPED_POINTER(pm), + MLIR_PYTHON_CAPSULE_PASS_MANAGER, NULL); +} + +/** Extracts an MlirFrozenRewritePatternSet from a capsule as produced from + * mlirPythonFrozenRewritePatternSetToCapsule. If the capsule is not of the + * right type, then a null module is returned. */ +static inline MlirFrozenRewritePatternSet +mlirPythonCapsuleToFrozenRewritePatternSet(PyObject *capsule) { + void *ptr = PyCapsule_GetPointer(capsule, MLIR_PYTHON_CAPSULE_PASS_MANAGER); + MlirFrozenRewritePatternSet pm = {ptr}; + return pm; +} + /** Creates a capsule object encapsulating the raw C-API MlirPassManager. * The returned capsule does not extend or affect ownership of any Python * objects that reference the module in any way. */ diff --git a/mlir/include/mlir-c/Rewrite.h b/mlir/include/mlir-c/Rewrite.h new file mode 100644 index 00000000000000..45218a1cd4ebd5 --- /dev/null +++ b/mlir/include/mlir-c/Rewrite.h @@ -0,0 +1,60 @@ +//===-- mlir-c/Rewrite.h - Helpers for C API to Rewrites ----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header declares the registration and creation method for +// rewrite patterns. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_C_REWRITE_H +#define MLIR_C_REWRITE_H + +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" +#include "mlir/Config/mlir-config.h" + +//===----------------------------------------------------------------------===// +/// Opaque type declarations (see mlir-c/IR.h for more details). +//===----------------------------------------------------------------------===// + +#define DEFINE_C_API_STRUCT(name, storage) \ + struct name { \ + storage *ptr; \ + }; \ + typedef struct name name + +DEFINE_C_API_STRUCT(MlirFrozenRewritePatternSet, void); +DEFINE_C_API_STRUCT(MlirGreedyRewriteDriverConfig, void); +DEFINE_C_API_STRUCT(MlirRewritePatternSet, void); + +MLIR_CAPI_EXPORTED MlirFrozenRewritePatternSet +mlirFreezeRewritePattern(MlirRewritePatternSet op); + +MLIR_CAPI_EXPORTED void +mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op); + +MLIR_CAPI_EXPORTED MlirLogicalResult mlirApplyPatternsAndFoldGreedily( + MlirModule op, MlirFrozenRewritePatternSet patterns, + MlirGreedyRewriteDriverConfig); + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +DEFINE_C_API_STRUCT(MlirPDLPatternModule, void); + +MLIR_CAPI_EXPORTED MlirPDLPatternModule +mlirPDLPatternModuleFromModule(MlirModule op); + +MLIR_CAPI_EXPORTED void mlirPDLPatternModuleDestroy(MlirPDLPatternModule op); + +MLIR_CAPI_EXPORTED MlirRewritePatternSet +mlirRewritePatternSetFromPDLPatternModule(MlirPDLPatternModule op); +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH + +#undef DEFINE_C_API_STRUCT + +#endif // MLIR_C_REWRITE_H diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h index d8f22c7aa17096..ebf50109f72f23 100644 --- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h +++ b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h @@ -198,6 +198,27 @@ struct type_caster { }; }; +/// Casts object <-> MlirFrozenRewritePatternSet. +template <> +struct type_caster { + PYBIND11_TYPE_CASTER(MlirFrozenRewritePatternSet, + _("MlirFrozenRewritePatternSet")); + bool load(handle src, bool) { + py::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); + return value.ptr != nullptr; + } + static handle cast(MlirFrozenRewritePatternSet v, return_value_policy, + handle) { + py::object capsule = py::reinterpret_steal( + mlirPythonFrozenRewritePatternSetToCapsule(v)); + return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("rewrite")) + .attr("FrozenRewritePatternSet") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + /// Casts object <-> MlirOperation. template <> struct type_caster { diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index 8c34c11f709501..f49efcd506ee91 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -22,6 +22,7 @@ #include "mlir-c/Diagnostics.h" #include "mlir-c/IR.h" #include "mlir-c/IntegerSet.h" +#include "mlir-c/Transforms.h" #include "mlir/Bindings/Python/PybindAdaptors.h" #include "llvm/ADT/DenseMap.h" diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp index 17272472ccca42..8da1ab16a4514b 100644 --- a/mlir/lib/Bindings/Python/MainModule.cpp +++ b/mlir/lib/Bindings/Python/MainModule.cpp @@ -11,6 +11,7 @@ #include "Globals.h" #include "IRModule.h" #include "Pass.h" +#include "Rewrite.h" namespace py = pybind11; using namespace mlir; @@ -116,6 +117,9 @@ PYBIND11_MODULE(_mlir, m) { populateIRInterfaces(irModule); populateIRTypes(irModule); + auto rewriteModule = m.def_submodule("rewrite", "MLIR Rewrite Bindings"); + populateRewriteSubmodule(rewriteModule); + // Define and populate PassManager submodule. auto passModule = m.def_submodule("passmanager", "MLIR Pass Management Bindings"); diff --git a/mlir/lib/Bindings/Python/Rewrite.cpp b/mlir/lib/Bindings/Python/Rewrite.cpp new file mode 100644 index 00000000000000..1d8128be9f0826 --- /dev/null +++ b/mlir/lib/Bindings/Python/Rewrite.cpp @@ -0,0 +1,110 @@ +//===- Rewrite.cpp - Rewrite ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Rewrite.h" + +#include "IRModule.h" +#include "mlir-c/Bindings/Python/Interop.h" +#include "mlir-c/Rewrite.h" +#include "mlir/Config/mlir-config.h" + +namespace py = pybind11; +using namespace mlir; +using namespace py::literals; +using namespace mlir::python; + +namespace { + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +/// Owning Wrapper around a PDLPatternModule. +class PyPDLPatternModule { +public: + PyPDLPatternModule(MlirPDLPatternModule module) : module(module) {} + PyPDLPatternModule(PyPDLPatternModule &&other) noexcept + : module(other.module) { + other.module.ptr = nullptr; + } + ~PyPDLPatternModule() { + if (module.ptr != nullptr) + mlirPDLPatternModuleDestroy(module); + } + MlirPDLPatternModule get() { return module; } + +private: + MlirPDLPatternModule module; +}; +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH + +/// Owning Wrapper around a FrozenRewritePatternSet. +class PyFrozenRewritePatternSet { +public: + PyFrozenRewritePatternSet(MlirFrozenRewritePatternSet set) : set(set) {} + PyFrozenRewritePatternSet(PyFrozenRewritePatternSet &&other) noexcept + : set(other.set) { + other.set.ptr = nullptr; + } + ~PyFrozenRewritePatternSet() { + if (set.ptr != nullptr) + mlirFrozenRewritePatternSetDestroy(set); + } + MlirFrozenRewritePatternSet get() { return set; } + + pybind11::object getCapsule() { + return py::reinterpret_steal( + mlirPythonFrozenRewritePatternSetToCapsule(get())); + } + + static pybind11::object createFromCapsule(pybind11::object capsule) { + MlirFrozenRewritePatternSet rawPm = + mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); + if (rawPm.ptr == nullptr) + throw py::error_already_set(); + return py::cast(PyFrozenRewritePatternSet(rawPm), + py::return_value_policy::move); + } + +private: + MlirFrozenRewritePatternSet set; +}; + +} // namespace + +/// Create the `mlir.rewrite` here. +void mlir::python::populateRewriteSubmodule(py::module &m) { + //---------------------------------------------------------------------------- + // Mapping of the top-level PassManager + //---------------------------------------------------------------------------- +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH + py::class_(m, "PDLModule", py::module_local()) + .def(py::init<>([](MlirModule module) { + return mlirPDLPatternModuleFromModule(module); + }), + "module"_a, "Create a PDL module from the given module.") + .def("freeze", [](PyPDLPatternModule &self) { + return new PyFrozenRewritePatternSet(mlirFreezeRewritePattern( + mlirRewritePatternSetFromPDLPatternModule(self.get()))); + }); +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCg + py::class_(m, "FrozenRewritePatternSet", + py::module_local()) + .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, + &PyFrozenRewritePatternSet::getCapsule) + .def(MLIR_PYTHON_CAPI_FACTORY_ATTR, + &PyFrozenRewritePatternSet::createFromCapsule); + m.def( + "apply_patterns_and_fold_greedily", + [](MlirModule module, MlirFrozenRewritePatternSet set) { + auto status = mlirApplyPatternsAndFoldGreedily(module, set, {}); + if (mlirLogicalResultIsFailure(status)) + // FIXME: Not sure this is the right error to throw here. + throw py::value_error("pattern application failed to converge"); + }, + "module"_a, "set"_a, + "Applys the given patterns to the given module greedily while folding " + "results."); +} diff --git a/mlir/lib/Bindings/Python/Rewrite.h b/mlir/lib/Bindings/Python/Rewrite.h new file mode 100644 index 00000000000000..997b80adda3038 --- /dev/null +++ b/mlir/lib/Bindings/Python/Rewrite.h @@ -0,0 +1,22 @@ +//===- Rewrite.h - Rewrite Submodules of pybind module --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_BINDINGS_PYTHON_REWRITE_H +#define MLIR_BINDINGS_PYTHON_REWRITE_H + +#include "PybindUtils.h" + +namespace mlir { +namespace python { + +void populateRewriteSubmodule(pybind11::module &m); + +} // namespace python +} // namespace mlir + +#endif // MLIR_BINDINGS_PYTHON_REWRITE_H diff --git a/mlir/lib/CAPI/Transforms/CMakeLists.txt b/mlir/lib/CAPI/Transforms/CMakeLists.txt index 2638025a8c359a..6c67aa09fdf402 100644 --- a/mlir/lib/CAPI/Transforms/CMakeLists.txt +++ b/mlir/lib/CAPI/Transforms/CMakeLists.txt @@ -1,6 +1,9 @@ add_mlir_upstream_c_api_library(MLIRCAPITransforms Passes.cpp + Rewrite.cpp LINK_LIBS PUBLIC + MLIRIR MLIRTransforms + MLIRTransformUtils ) diff --git a/mlir/lib/CAPI/Transforms/Rewrite.cpp b/mlir/lib/CAPI/Transforms/Rewrite.cpp new file mode 100644 index 00000000000000..0de1958398f63e --- /dev/null +++ b/mlir/lib/CAPI/Transforms/Rewrite.cpp @@ -0,0 +1,83 @@ +//===- Rewrite.cpp - C API for Rewrite Patterns ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Rewrite.h" +#include "mlir-c/Transforms.h" +#include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Rewrite/FrozenRewritePatternSet.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +inline mlir::RewritePatternSet &unwrap(MlirRewritePatternSet module) { + assert(module.ptr && "unexpected null module"); + return *(static_cast(module.ptr)); +} + +inline MlirRewritePatternSet wrap(mlir::RewritePatternSet *module) { + return {module}; +} + +inline mlir::FrozenRewritePatternSet * +unwrap(MlirFrozenRewritePatternSet module) { + assert(module.ptr && "unexpected null module"); + return static_cast(module.ptr); +} + +inline MlirFrozenRewritePatternSet wrap(mlir::FrozenRewritePatternSet *module) { + return {module}; +} + +MlirFrozenRewritePatternSet mlirFreezeRewritePattern(MlirRewritePatternSet op) { + auto *m = new mlir::FrozenRewritePatternSet(std::move(unwrap(op))); + op.ptr = nullptr; + return wrap(m); +} + +void mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op) { + delete unwrap(op); + op.ptr = nullptr; +} + +MlirLogicalResult +mlirApplyPatternsAndFoldGreedily(MlirModule op, + MlirFrozenRewritePatternSet patterns, + MlirGreedyRewriteDriverConfig) { + return wrap( + mlir::applyPatternsAndFoldGreedily(unwrap(op), *unwrap(patterns))); +} + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +inline mlir::PDLPatternModule *unwrap(MlirPDLPatternModule module) { + assert(module.ptr && "unexpected null module"); + return static_cast(module.ptr); +} + +inline MlirPDLPatternModule wrap(mlir::PDLPatternModule *module) { + return {module}; +} + +MlirPDLPatternModule mlirPDLPatternModuleFromModule(MlirModule op) { + return wrap(new mlir::PDLPatternModule( + mlir::OwningOpRef(unwrap(op)))); +} + +void mlirPDLPatternModuleDestroy(MlirPDLPatternModule op) { + delete unwrap(op); + op.ptr = nullptr; +} + +MlirRewritePatternSet +mlirRewritePatternSetFromPDLPatternModule(MlirPDLPatternModule op) { + auto *m = new mlir::RewritePatternSet(std::move(*unwrap(op))); + op.ptr = nullptr; + return wrap(m); +} +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index d8f2d1989fdea7..d03036e17749d4 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -21,6 +21,7 @@ declare_mlir_python_sources(MLIRPythonSources.Core.Python _mlir_libs/__init__.py ir.py passmanager.py + rewrite.py dialects/_ods_common.py # The main _mlir module has submodules: include stubs from each. @@ -448,6 +449,7 @@ declare_mlir_python_extension(MLIRPythonExtension.Core IRModule.cpp IRTypes.cpp Pass.cpp + Rewrite.cpp # Headers must be included explicitly so they are installed. Globals.h diff --git a/mlir/python/mlir/dialects/pdl.py b/mlir/python/mlir/dialects/pdl.py index db07dc50aabd79..b7b8430cebd07a 100644 --- a/mlir/python/mlir/dialects/pdl.py +++ b/mlir/python/mlir/dialects/pdl.py @@ -6,7 +6,7 @@ from ._pdl_ops_gen import _Dialect from .._mlir_libs._mlirDialectsPDL import * from .._mlir_libs._mlirDialectsPDL import OperationType - +from ..extras.meta import region_op try: from ..ir import * @@ -127,6 +127,9 @@ def body(self): return self.regions[0].blocks[0] +pattern = region_op(PatternOp.__base__) + + @_ods_cext.register_operation(_Dialect, replace=True) class ReplaceOp(ReplaceOp): """Specialization for PDL replace op class.""" @@ -195,6 +198,9 @@ def body(self): return self.regions[0].blocks[0] +rewrite = region_op(RewriteOp) + + @_ods_cext.register_operation(_Dialect, replace=True) class TypeOp(TypeOp): """Specialization for PDL type op class.""" diff --git a/mlir/python/mlir/rewrite.py b/mlir/python/mlir/rewrite.py new file mode 100644 index 00000000000000..5bc1bba7ae9a72 --- /dev/null +++ b/mlir/python/mlir/rewrite.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._mlir_libs._mlir.rewrite import * diff --git a/mlir/test/python/integration/dialects/pdl.py b/mlir/test/python/integration/dialects/pdl.py new file mode 100644 index 00000000000000..923af29a71ad75 --- /dev/null +++ b/mlir/test/python/integration/dialects/pdl.py @@ -0,0 +1,67 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +from mlir.dialects import arith, func, pdl +from mlir.dialects.builtin import module +from mlir.ir import * +from mlir.rewrite import * + + +def construct_and_print_in_module(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + module = f(module) + if module is not None: + print(module) + return f + + +# CHECK-LABEL: TEST: test_add_to_mul +# CHECK: arith.muli +@construct_and_print_in_module +def test_add_to_mul(module_): + index_type = IndexType.get() + + # Create a test case. + @module(sym_name="ir") + def ir(): + @func.func(index_type, index_type) + def add_func(a, b): + return arith.addi(a, b) + + # Create a rewrite from add to mul. This will match + # - operation name is arith.addi + # - operands are index types. + # - there are two operands. + with Location.unknown(): + m = Module.create() + with InsertionPoint(m.body): + # Change all arith.addi with index types to arith.muli. + @pdl.pattern(benefit=1, sym_name="addi_to_mul") + def pat(): + # Match arith.addi with index types. + index_type = pdl.TypeOp(IndexType.get()) + operand0 = pdl.OperandOp(index_type) + operand1 = pdl.OperandOp(index_type) + op0 = pdl.OperationOp( + name="arith.addi", args=[operand0, operand1], types=[index_type] + ) + + # Replace the matched op with arith.muli. + @pdl.rewrite() + def rew(): + newOp = pdl.OperationOp( + name="arith.muli", args=[operand0, operand1], types=[index_type] + ) + pdl.ReplaceOp(op0, with_op=newOp) + + # Create a PDL module from module and freeze it. At this point the ownership + # of the module is transferred to the PDL module. This ownership transfer is + # not yet captured Python side/has sharp edges. So best to construct the + # module and PDL module in same scope. + # FIXME: This should be made more robust. + frozen = PDLModule(m).freeze() + # Could apply frozen pattern set multiple times. + apply_patterns_and_fold_greedily(module_, frozen) + return module_ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 0254e127980e58..9eda1a2b4c7e1b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -420,6 +420,7 @@ mlir_c_api_cc_library( "include/mlir-c/Interfaces.h", "include/mlir-c/Pass.h", "include/mlir-c/RegisterEverything.h", + "include/mlir-c/Rewrite.h", "include/mlir-c/Support.h", "include/mlir/CAPI/AffineExpr.h", "include/mlir/CAPI/AffineMap.h", @@ -866,7 +867,10 @@ mlir_c_api_cc_library( mlir_c_api_cc_library( name = "CAPITransforms", - srcs = ["lib/CAPI/Transforms/Passes.cpp"], + srcs = [ + "lib/CAPI/Transforms/Passes.cpp", + "lib/CAPI/Transforms/Rewrite.cpp", + ], hdrs = ["include/mlir-c/Transforms.h"], capi_deps = [ ":CAPIIR", @@ -876,7 +880,10 @@ mlir_c_api_cc_library( ], includes = ["include"], deps = [ + ":IR", ":Pass", + ":Rewrite", + ":TransformUtils", ":Transforms", ], ) @@ -939,6 +946,7 @@ cc_library( textual_hdrs = glob(MLIR_BINDINGS_PYTHON_HEADERS), deps = [ ":CAPIIRHeaders", + ":CAPITransformsHeaders", "@local_config_python//:python_headers", "@pybind11", ], @@ -957,6 +965,7 @@ cc_library( textual_hdrs = glob(MLIR_BINDINGS_PYTHON_HEADERS), deps = [ ":CAPIIR", + ":CAPITransforms", "@local_config_python//:python_headers", "@pybind11", ], @@ -981,6 +990,7 @@ MLIR_PYTHON_BINDINGS_SOURCES = [ "lib/Bindings/Python/IRModule.cpp", "lib/Bindings/Python/IRTypes.cpp", "lib/Bindings/Python/Pass.cpp", + "lib/Bindings/Python/Rewrite.cpp", ] cc_library( diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index add150de69faf4..254cab0db4a5d6 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -82,6 +82,13 @@ filegroup( ], ) +filegroup( + name = "RewritePyFiles", + srcs = [ + "mlir/rewrite.py", + ], +) + filegroup( name = "RuntimePyFiles", srcs = glob([ From b746babb22bdcd945b7665d63472c9d8695893d0 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 11 Jun 2024 16:51:33 +0200 Subject: [PATCH 03/38] Restore 'REQUIRES: shell' for another test after 878deae Otherwise this would fail when using gnuwin32. --- llvm/test/tools/llvm-rc/windres-prefix.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/tools/llvm-rc/windres-prefix.test b/llvm/test/tools/llvm-rc/windres-prefix.test index 7dda51d0635232..4c53fdfc3db65e 100644 --- a/llvm/test/tools/llvm-rc/windres-prefix.test +++ b/llvm/test/tools/llvm-rc/windres-prefix.test @@ -1,3 +1,5 @@ +; REQUIRES: shell + ; RUN: rm -rf %t && mkdir %t ; Check that a triple prefix on the executable gets picked up as target triple. From f5dcfb9968a3d4945b5fa7d142044c29c9a9f175 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Tue, 11 Jun 2024 17:04:48 +0200 Subject: [PATCH 04/38] [libc][math][c23] Add {totalorder,totalordermag}f16 C23 math functions (#95014) Part of #93566. --- libc/config/linux/aarch64/entrypoints.txt | 2 + libc/config/linux/x86_64/entrypoints.txt | 2 + libc/docs/c23.rst | 4 +- libc/docs/math/index.rst | 4 + libc/spec/stdc.td | 4 + libc/src/__support/FPUtil/BasicOperations.h | 25 +++ libc/src/math/CMakeLists.txt | 4 + libc/src/math/generic/CMakeLists.txt | 24 +++ libc/src/math/generic/totalorderf16.cpp | 19 +++ libc/src/math/generic/totalordermagf16.cpp | 20 +++ libc/src/math/totalorderf16.h | 20 +++ libc/src/math/totalordermagf16.h | 20 +++ libc/test/UnitTest/FPMatcher.h | 6 +- libc/test/src/math/smoke/CMakeLists.txt | 24 +++ libc/test/src/math/smoke/TotalOrderMagTest.h | 142 ++++++++++++++++++ libc/test/src/math/smoke/TotalOrderTest.h | 138 +++++++++++++++++ .../src/math/smoke/totalorderf16_test.cpp | 13 ++ .../src/math/smoke/totalordermagf16_test.cpp | 13 ++ 18 files changed, 480 insertions(+), 4 deletions(-) create mode 100644 libc/src/math/generic/totalorderf16.cpp create mode 100644 libc/src/math/generic/totalordermagf16.cpp create mode 100644 libc/src/math/totalorderf16.h create mode 100644 libc/src/math/totalordermagf16.h create mode 100644 libc/test/src/math/smoke/TotalOrderMagTest.h create mode 100644 libc/test/src/math/smoke/TotalOrderTest.h create mode 100644 libc/test/src/math/smoke/totalorderf16_test.cpp create mode 100644 libc/test/src/math/smoke/totalordermagf16_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 381061ce3fcbf0..db96a80051a8dd 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -541,6 +541,8 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 libc.src.math.truncf16 libc.src.math.ufromfpf16 libc.src.math.ufromfpxf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index e99960b12441da..355eaf33ace6d1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -571,6 +571,8 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 libc.src.math.truncf16 libc.src.math.ufromfpf16 libc.src.math.ufromfpxf16 diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst index fec9b24bbd5815..4134befd1ed358 100644 --- a/libc/docs/c23.rst +++ b/libc/docs/c23.rst @@ -42,8 +42,8 @@ Additions: * rsqrt* * __STDC_IEC_60559_DFP__ functions (_Decimal32, _Decimal64, _Decimal128) * compoundn* - * totalorder* - * totalordermag* + * totalorder* |check| + * totalordermag* |check| * getpayload* * setpayload* * iscannonical diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index f83a646c34b57c..d556885eda6223 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -210,6 +210,10 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | scalbn | |check| | |check| | |check| | | |check| | 7.12.6.19 | F.10.3.19 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| totalorder | | | | |check| | | F.10.12.1 | N/A | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| totalordermag | | | | |check| | | F.10.12.2 | N/A | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | trunc | |check| | |check| | |check| | |check| | |check| | 7.12.9.9 | F.10.6.9 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ufromfp | |check| | |check| | |check| | |check| | |check| | 7.12.9.10 | F.10.6.10 | diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 34169948fc6d27..b134ec00a7d7a2 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -710,6 +710,10 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"canonicalizel", RetValSpec, [ArgSpec, ArgSpec]>, GuardedFunctionSpec<"canonicalizef16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"canonicalizef128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + + GuardedFunctionSpec<"totalorderf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + + GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, ] >; diff --git a/libc/src/__support/FPUtil/BasicOperations.h b/libc/src/__support/FPUtil/BasicOperations.h index e5ac101fedc0e3..beb8e48db8f51b 100644 --- a/libc/src/__support/FPUtil/BasicOperations.h +++ b/libc/src/__support/FPUtil/BasicOperations.h @@ -240,6 +240,31 @@ LIBC_INLINE int canonicalize(T &cx, const T &x) { return 0; } +template +LIBC_INLINE cpp::enable_if_t, bool> +totalorder(T x, T y) { + using FPBits = FPBits; + FPBits x_bits(x); + FPBits y_bits(y); + + using StorageType = typename FPBits::StorageType; + StorageType x_u = x_bits.uintval(); + StorageType y_u = y_bits.uintval(); + + using signed_t = cpp::make_signed_t; + signed_t x_signed = static_cast(x_u); + signed_t y_signed = static_cast(y_u); + + bool both_neg = (x_u & y_u & FPBits::SIGN_MASK) != 0; + return x_signed == y_signed || ((x_signed <= y_signed) != both_neg); +} + +template +LIBC_INLINE cpp::enable_if_t, bool> +totalordermag(T x, T y) { + return FPBits(x).abs().uintval() <= FPBits(y).abs().uintval(); +} + } // namespace fputil } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 82dfdaf479ff00..2446c293b8ef5a 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -369,6 +369,10 @@ add_math_entrypoint_object(tanhf) add_math_entrypoint_object(tgamma) add_math_entrypoint_object(tgammaf) +add_math_entrypoint_object(totalorderf16) + +add_math_entrypoint_object(totalordermagf16) + add_math_entrypoint_object(trunc) add_math_entrypoint_object(truncf) add_math_entrypoint_object(truncl) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f4f683e61bd658..673bef516b13d1 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3577,3 +3577,27 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 ) + +add_entrypoint_object( + totalorderf16 + SRCS + totalorderf16.cpp + HDRS + ../totalorderf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + totalordermagf16 + SRCS + totalordermagf16.cpp + HDRS + ../totalordermagf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) diff --git a/libc/src/math/generic/totalorderf16.cpp b/libc/src/math/generic/totalorderf16.cpp new file mode 100644 index 00000000000000..e43beb33d2fd3d --- /dev/null +++ b/libc/src/math/generic/totalorderf16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of totalorderf16 function --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/totalorderf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, totalorderf16, (const float16 *x, const float16 *y)) { + return static_cast(fputil::totalorder(*x, *y)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/totalordermagf16.cpp b/libc/src/math/generic/totalordermagf16.cpp new file mode 100644 index 00000000000000..09d04fbeb2d2c6 --- /dev/null +++ b/libc/src/math/generic/totalordermagf16.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of totalordermagf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/totalordermagf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, totalordermagf16, + (const float16 *x, const float16 *y)) { + return static_cast(fputil::totalordermag(*x, *y)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/totalorderf16.h b/libc/src/math/totalorderf16.h new file mode 100644 index 00000000000000..f5390140c4dc2e --- /dev/null +++ b/libc/src/math/totalorderf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for totalorderf16 -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_TOTALORDERF16_H +#define LLVM_LIBC_SRC_MATH_TOTALORDERF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +int totalorderf16(const float16 *x, const float16 *y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_TOTALORDERF16_H diff --git a/libc/src/math/totalordermagf16.h b/libc/src/math/totalordermagf16.h new file mode 100644 index 00000000000000..8c6621b9783dfb --- /dev/null +++ b/libc/src/math/totalordermagf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for totalordermagf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H +#define LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +int totalordermagf16(const float16 *x, const float16 *y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 26af5cec02b587..86b823260e1979 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -97,8 +97,10 @@ template struct FPTest : public Test { LIBC_NAMESPACE::cpp::numeric_limits::max(); \ const T zero = FPBits::zero(Sign::POS).get_val(); \ const T neg_zero = FPBits::zero(Sign::NEG).get_val(); \ - const T aNaN = FPBits::quiet_nan().get_val(); \ - const T sNaN = FPBits::signaling_nan().get_val(); \ + const T aNaN = FPBits::quiet_nan(Sign::POS).get_val(); \ + const T neg_aNaN = FPBits::quiet_nan(Sign::NEG).get_val(); \ + const T sNaN = FPBits::signaling_nan(Sign::POS).get_val(); \ + const T neg_sNaN = FPBits::signaling_nan(Sign::NEG).get_val(); \ const T inf = FPBits::inf(Sign::POS).get_val(); \ const T neg_inf = FPBits::inf(Sign::NEG).get_val(); \ const T min_normal = FPBits::min_normal().get_val(); \ diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 75e2bdd7be100a..68cd412b14e9d3 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3519,3 +3519,27 @@ add_fp_unittest( libc.src.math.powf libc.src.__support.FPUtil.fp_bits ) + +add_fp_unittest( + totalorderf16_test + SUITE + libc-math-smoke-tests + SRCS + totalorderf16_test.cpp + HDRS + TotalOrderTest.h + DEPENDS + libc.src.math.totalorderf16 +) + +add_fp_unittest( + totalordermagf16_test + SUITE + libc-math-smoke-tests + SRCS + totalordermagf16_test.cpp + HDRS + TotalOrderMagTest.h + DEPENDS + libc.src.math.totalordermagf16 +) diff --git a/libc/test/src/math/smoke/TotalOrderMagTest.h b/libc/test/src/math/smoke/TotalOrderMagTest.h new file mode 100644 index 00000000000000..5fe2983a0e678b --- /dev/null +++ b/libc/test/src/math/smoke/TotalOrderMagTest.h @@ -0,0 +1,142 @@ +//===-- Utility class to test flavors of totalordermag ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H +#define LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +template +class TotalOrderMagTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*TotalOrderMagFunc)(const T *, const T *); + + bool funcWrapper(TotalOrderMagFunc func, T x, T y) { + return func(&x, &y) != 0; + } + + void testXLesserThanY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_inf, inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(123.38))); + + EXPECT_FALSE(funcWrapper(func, T(-0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(-123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(123.38))); + } + + void testXGreaterThanY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(-123.38))); + } + + void testXEqualToY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, inf)); + EXPECT_TRUE(funcWrapper(func, neg_inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(-0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(123.38))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(-123.38))); + } + + void testSingleNaN(TotalOrderMagFunc func) { + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(123.38))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), neg_aNaN)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), aNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(123.38))); + } + + void testNaNSigns(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, sNaN)); + + EXPECT_TRUE(funcWrapper(func, aNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, neg_sNaN)); + } + + void testQuietVsSignalingNaN(TotalOrderMagFunc func) { + EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, sNaN)); + } + + void testNaNPayloads(TotalOrderMagFunc func) { + T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val(); + T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val(); + T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val(); + T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val(); + + EXPECT_TRUE(funcWrapper(func, aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123)); + EXPECT_TRUE(funcWrapper(func, sNaN, snan_123)); + EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN)); + EXPECT_FALSE(funcWrapper(func, snan_123, sNaN)); + + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_qnan_123)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_snan_123)); + EXPECT_FALSE(funcWrapper(func, neg_qnan_123, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, neg_snan_123, neg_sNaN)); + } +}; + +#define LIST_TOTALORDERMAG_TESTS(T, func) \ + using LlvmLibcTotalOrderMagTest = TotalOrderMagTestTemplate; \ + TEST_F(LlvmLibcTotalOrderMagTest, XLesserThanY) { testXLesserThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, XGreaterThanY) { \ + testXGreaterThanY(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderMagTest, XEqualToY) { testXEqualToY(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, SingleNaN) { testSingleNaN(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, NaNSigns) { testNaNSigns(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, QuietVsSignalingNaN) { \ + testQuietVsSignalingNaN(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderMagTest, NaNPayloads) { testNaNPayloads(&func); } + +#endif // LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H diff --git a/libc/test/src/math/smoke/TotalOrderTest.h b/libc/test/src/math/smoke/TotalOrderTest.h new file mode 100644 index 00000000000000..281b2a59f930db --- /dev/null +++ b/libc/test/src/math/smoke/TotalOrderTest.h @@ -0,0 +1,138 @@ +//===-- Utility class to test different flavors of totalorder ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H +#define LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +template +class TotalOrderTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*TotalOrderFunc)(const T *, const T *); + + bool funcWrapper(TotalOrderFunc func, T x, T y) { return func(&x, &y) != 0; } + + void testXLesserThanY(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_inf, inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(123.38))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(123.38))); + } + + void testXGreaterThanY(TotalOrderFunc func) { + EXPECT_FALSE(funcWrapper(func, inf, neg_inf)); + + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-0.1))); + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(0.0))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(-0.1))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(-123.38))); + } + + void testXEqualToY(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, inf)); + EXPECT_TRUE(funcWrapper(func, neg_inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(-0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(123.38))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(-123.38))); + } + + void testSingleNaN(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(0.0))); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(0.1))); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.0), neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, T(0.1), neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, T(123.38), neg_aNaN)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), aNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(123.38))); + } + + void testNaNSigns(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, sNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, sNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, sNaN, neg_sNaN)); + } + + void testQuietVsSignalingNaN(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, sNaN)); + } + + void testNaNPayloads(TotalOrderFunc func) { + T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val(); + T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val(); + T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val(); + T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val(); + + EXPECT_TRUE(funcWrapper(func, aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123)); + EXPECT_TRUE(funcWrapper(func, sNaN, snan_123)); + EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN)); + EXPECT_FALSE(funcWrapper(func, snan_123, sNaN)); + + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_qnan_123)); + EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_snan_123)); + EXPECT_TRUE(funcWrapper(func, neg_qnan_123, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_snan_123, neg_sNaN)); + } +}; + +#define LIST_TOTALORDER_TESTS(T, func) \ + using LlvmLibcTotalOrderTest = TotalOrderTestTemplate; \ + TEST_F(LlvmLibcTotalOrderTest, XLesserThanY) { testXLesserThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, XGreaterThanY) { testXGreaterThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, XEqualToY) { testXEqualToY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, SingleNaN) { testSingleNaN(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, NaNSigns) { testNaNSigns(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, QuietVsSignalingNaN) { \ + testQuietVsSignalingNaN(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderTest, NaNPayloads) { testNaNPayloads(&func); } + +#endif // LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H diff --git a/libc/test/src/math/smoke/totalorderf16_test.cpp b/libc/test/src/math/smoke/totalorderf16_test.cpp new file mode 100644 index 00000000000000..410c70c47c51d8 --- /dev/null +++ b/libc/test/src/math/smoke/totalorderf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for totalorderf16 ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TotalOrderTest.h" + +#include "src/math/totalorderf16.h" + +LIST_TOTALORDER_TESTS(float16, LIBC_NAMESPACE::totalorderf16) diff --git a/libc/test/src/math/smoke/totalordermagf16_test.cpp b/libc/test/src/math/smoke/totalordermagf16_test.cpp new file mode 100644 index 00000000000000..b09eb11cd9c3bb --- /dev/null +++ b/libc/test/src/math/smoke/totalordermagf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for totalordermagf16 ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TotalOrderMagTest.h" + +#include "src/math/totalordermagf16.h" + +LIST_TOTALORDERMAG_TESTS(float16, LIBC_NAMESPACE::totalordermagf16) From 3f3e85cd234c1a57021b6622590f75d42054ef12 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 11 Jun 2024 16:38:53 +0100 Subject: [PATCH 05/38] [X86] ICMP EQ/NE MIN_SIGNED_INT - avoid immediate argument by using NEG + SETO/SETNO (#94948) For i64 this avoids loading a 64-bit value into register, for smaller registers this just avoids an immediate operand. For i8+i16, limit to one use case as we save fewer bytes and these can be wasted entirely on extra register moves. Fixes #67709 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++++ llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll | 4 ++-- llvm/test/CodeGen/X86/abs.ll | 13 +++---------- llvm/test/CodeGen/X86/combine-sdiv.ll | 4 ++-- llvm/test/CodeGen/X86/is_fpclass.ll | 18 ++++++++++-------- llvm/test/CodeGen/X86/lsr-overflow.ll | 6 +++--- llvm/test/CodeGen/X86/shrink-compare-pgso.ll | 4 ++-- llvm/test/CodeGen/X86/shrink-compare.ll | 4 ++-- 8 files changed, 38 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cad3ea4716db3e..0d79e4eb3f75a0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23816,6 +23816,20 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, } } + // Look for X == INT_MIN or X != INT_MIN. We can use NEG and test for + // overflow. + if (isMinSignedConstant(Op1)) { + EVT VT = Op0.getValueType(); + if (VT == MVT::i32 || VT == MVT::i64 || Op0->hasOneUse()) { + SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32); + X86::CondCode CondCode = CC == ISD::SETEQ ? X86::COND_O : X86::COND_NO; + X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); + SDValue Neg = DAG.getNode(X86ISD::SUB, dl, CmpVTs, + DAG.getConstant(0, dl, VT), Op0); + return SDValue(Neg.getNode(), 1); + } + } + // Try to use the carry flag from the add in place of an separate CMP for: // (seteq (add X, -1), -1). Similar for setne. if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD && diff --git a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll index feaa38a7600a21..00ffea903079ef 100644 --- a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll +++ b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll @@ -8,8 +8,8 @@ define i16 @test(ptr %tmp179) nounwind { ; CHECK-NEXT: movzwl (%eax), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: andl $64512, %ecx ## imm = 0xFC00 -; CHECK-NEXT: cmpl $32768, %ecx ## imm = 0x8000 -; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: negw %cx +; CHECK-NEXT: jno LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %bb189 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index dde877c5bb61e5..bae140abdf6b1a 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -783,8 +783,7 @@ define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: negl %eax ; X64-NEXT: cmovsl %edi, %eax -; X64-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 -; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: cmovol %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: test_minsigned_i32: @@ -793,11 +792,7 @@ define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: negl %eax ; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: cmpl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: jne .LBB19_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB19_2: +; X86-NEXT: cmovol {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl %lim = icmp eq i32 %a0, -2147483648 %abs = tail call i32 @llvm.abs.i32(i32 %a0, i1 false) @@ -811,9 +806,7 @@ define i64 @test_minsigned_i64(i64 %a0, i64 %a1) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: negq %rax ; X64-NEXT: cmovsq %rdi, %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: cmpq %rcx, %rdi -; X64-NEXT: cmoveq %rsi, %rax +; X64-NEXT: cmovoq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: test_minsigned_i64: diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 49797fbefa5973..5c5487815b3360 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -58,8 +58,8 @@ define i32 @combine_sdiv_by_minsigned(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_minsigned: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 -; CHECK-NEXT: sete %al +; CHECK-NEXT: negl %edi +; CHECK-NEXT: seto %al ; CHECK-NEXT: retq %1 = sdiv i32 %x, -2147483648 ret i32 %1 diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 999be0f98b6fc5..532b2c09a9175f 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -937,15 +937,16 @@ entry: define i1 @is_minus_zero_f(float %x) { ; X86-LABEL: is_minus_zero_f: ; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X86-NEXT: seto %al ; X86-NEXT: retl ; ; X64-LABEL: is_minus_zero_f: ; X64: # %bb.0: # %entry ; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: sete %al +; X64-NEXT: negl %eax +; X64-NEXT: seto %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 32) ; 0x20 = "-zero" @@ -955,15 +956,16 @@ entry: define i1 @not_is_minus_zero_f(float %x) { ; X86-LABEL: not_is_minus_zero_f: ; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; X86-NEXT: setne %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setno %al ; X86-NEXT: retl ; ; X64-LABEL: not_is_minus_zero_f: ; X64: # %bb.0: # %entry ; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: setne %al +; X64-NEXT: negl %eax +; X64-NEXT: setno %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 991) ; ~0x20 = ~"-zero" diff --git a/llvm/test/CodeGen/X86/lsr-overflow.ll b/llvm/test/CodeGen/X86/lsr-overflow.ll index 09c1c07ef3de01..79440c282be758 100644 --- a/llvm/test/CodeGen/X86/lsr-overflow.ll +++ b/llvm/test/CodeGen/X86/lsr-overflow.ll @@ -4,9 +4,9 @@ ; The comparison uses the pre-inc value, which could lead LSR to ; try to compute -INT64_MIN. -; CHECK: movabsq $-9223372036854775808, %rax -; CHECK: cmpq %rax, -; CHECK: sete %al +; CHECK-NOT: movabsq $-9223372036854775808, %rax +; CHECK: negq %r +; CHECK-NEXT: seto %al declare i64 @bar() diff --git a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll index 254b8fe3fc6e30..5a15ee36c07263 100644 --- a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll +++ b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll @@ -265,8 +265,8 @@ if.end: define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind !prof !14 { ; CHECK-LABEL: test_sext_i8_icmp_neg128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpb $-128, %dil -; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: negb %dil +; CHECK-NEXT: jo bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll index 840167ff9f4a0c..1a61451c26a03c 100644 --- a/llvm/test/CodeGen/X86/shrink-compare.ll +++ b/llvm/test/CodeGen/X86/shrink-compare.ll @@ -265,8 +265,8 @@ if.end: define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize { ; CHECK-LABEL: test_sext_i8_icmp_neg128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpb $-128, %dil -; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: negb %dil +; CHECK-NEXT: jo bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq entry: From 00c5474918dac1a382c63628ed0844a354a0761f Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 11 Jun 2024 09:06:07 -0700 Subject: [PATCH 06/38] [HWASan] make get_info.local_time.pass.cpp UNSUPPORTED --- .../time.zone.members/get_info.local_time.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp index a8c468a6c6fd4d..ec3e490c0ed790 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp @@ -12,7 +12,7 @@ // XFAIL: libcpp-has-no-experimental-tzdb // XFAIL: availability-tzdb-missing // Times out under HWASan -// XFAIL: hwasan +// UNSUPPORTED: hwasan // From 4cf607fa15fd9ccd79115095a1eb02e0cd83e1a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 11 Jun 2024 18:12:37 +0200 Subject: [PATCH 07/38] [clang][Interp] Fix visiting non-FieldDecl MemberExprs Ignore the base and visit the Member decl like a regular DeclRefExpr. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 26 ++++++++++----------- clang/test/SemaCXX/ms-const-member-expr.cpp | 1 + 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 0385ca4b3a0639..820e4cc44a7bcd 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1494,6 +1494,9 @@ bool ByteCodeExprGen::VisitMemberExpr(const MemberExpr *E) { return false; } + if (!isa(Member)) + return this->discard(Base) && this->visitDeclRef(Member, E); + if (Initializing) { if (!this->delegate(Base)) return false; @@ -1503,19 +1506,16 @@ bool ByteCodeExprGen::VisitMemberExpr(const MemberExpr *E) { } // Base above gives us a pointer on the stack. - if (const auto *FD = dyn_cast(Member)) { - const RecordDecl *RD = FD->getParent(); - const Record *R = getRecord(RD); - if (!R) - return false; - const Record::Field *F = R->getField(FD); - // Leave a pointer to the field on the stack. - if (F->Decl->getType()->isReferenceType()) - return this->emitGetFieldPop(PT_Ptr, F->Offset, E) && maybeLoadValue(); - return this->emitGetPtrFieldPop(F->Offset, E) && maybeLoadValue(); - } - - return false; + const auto *FD = cast(Member); + const RecordDecl *RD = FD->getParent(); + const Record *R = getRecord(RD); + if (!R) + return false; + const Record::Field *F = R->getField(FD); + // Leave a pointer to the field on the stack. + if (F->Decl->getType()->isReferenceType()) + return this->emitGetFieldPop(PT_Ptr, F->Offset, E) && maybeLoadValue(); + return this->emitGetPtrFieldPop(F->Offset, E) && maybeLoadValue(); } template diff --git a/clang/test/SemaCXX/ms-const-member-expr.cpp b/clang/test/SemaCXX/ms-const-member-expr.cpp index 72cfe76fbe43a2..8312f84b550f00 100644 --- a/clang/test/SemaCXX/ms-const-member-expr.cpp +++ b/clang/test/SemaCXX/ms-const-member-expr.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -fsyntax-only -verify +// RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -fsyntax-only -verify -fexperimental-new-constant-interpreter struct S { enum { E = 1 }; From de19f7b6d46f1c38e10e604154f0fdaaffde9ebd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 11 Jun 2024 09:18:31 -0700 Subject: [PATCH 08/38] [MC] Replace fragment ilist with singly-linked lists Fragments are allocated with `operator new` and stored in an ilist with Prev/Next/Parent pointers. A more efficient representation would be an array of fragments without the overhead of Prev/Next pointers. As the first step, replace ilist with singly-linked lists. * `getPrevNode` uses have been eliminated by previous changes. * The last use of the `Prev` pointer remains: for each subsection, there is an insertion point and the current insertion point is stored at `CurInsertionPoint`. * `HexagonAsmBackend::finishLayout` needs a backward iterator. Save all fragments within `Frags`. Hexagon programs are usually small, and the performance does not matter that much. To eliminate `Prev`, change the subsection representation to singly-linked lists for subsections and a pointer to the active singly-linked list. The fragments from all subsections will be chained together at layout time. Since fragment lists are disconnected before layout time, we can remove `MCFragment::SubsectionNumber` (https://reviews.llvm.org/D69411). The current implementation of `AttemptToFoldSymbolOffsetDifference` requires future improvement for robustness. Pull Request: https://github.com/llvm/llvm-project/pull/95077 --- llvm/include/llvm/MC/MCFragment.h | 17 +++-- llvm/include/llvm/MC/MCObjectStreamer.h | 3 +- llvm/include/llvm/MC/MCSection.h | 75 +++++++++++-------- llvm/lib/MC/MCAssembler.cpp | 15 +++- llvm/lib/MC/MCExpr.cpp | 31 +++----- llvm/lib/MC/MCFragment.cpp | 4 +- llvm/lib/MC/MCObjectStreamer.cpp | 11 +-- llvm/lib/MC/MCSection.cpp | 62 +++++++-------- llvm/lib/MC/WasmObjectWriter.cpp | 5 +- .../MCTargetDesc/HexagonAsmBackend.cpp | 17 +++-- .../Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp | 2 +- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 +- 12 files changed, 123 insertions(+), 125 deletions(-) diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 67e10c35894954..45599c940659e4 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -23,12 +23,15 @@ namespace llvm { +class MCAssembler; class MCSection; class MCSubtargetInfo; class MCSymbol; -class MCFragment : public ilist_node_with_parent { +class MCFragment { friend class MCAsmLayout; + friend class MCAssembler; + friend class MCSection; public: enum FragmentType : uint8_t { @@ -51,6 +54,9 @@ class MCFragment : public ilist_node_with_parent { }; private: + // The next fragment within the section. + MCFragment *Next = nullptr; + /// The data for the section this fragment is in. MCSection *Parent; @@ -64,10 +70,6 @@ class MCFragment : public ilist_node_with_parent { /// The layout order of this fragment. unsigned LayoutOrder; - /// The subsection this fragment belongs to. This is 0 if the fragment is not - // in any subsection. - unsigned SubsectionNumber = 0; - FragmentType Kind; protected: @@ -88,6 +90,8 @@ class MCFragment : public ilist_node_with_parent { /// This method will dispatch to the appropriate subclass. void destroy(); + MCFragment *getNext() const { return Next; } + FragmentType getKind() const { return Kind; } MCSection *getParent() const { return Parent; } @@ -104,9 +108,6 @@ class MCFragment : public ilist_node_with_parent { bool hasInstructions() const { return HasInstructions; } void dump() const; - - void setSubsectionNumber(unsigned Value) { SubsectionNumber = Value; } - unsigned getSubsectionNumber() const { return SubsectionNumber; } }; class MCDummyFragment : public MCFragment { diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index e212d546139808..c0a337f5ea45e5 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -41,7 +41,6 @@ class raw_pwrite_stream; /// implementation. class MCObjectStreamer : public MCStreamer { std::unique_ptr Assembler; - MCSection::iterator CurInsertionPoint; bool EmitEHFrame; bool EmitDebugFrame; SmallVector PendingLabels; @@ -94,7 +93,7 @@ class MCObjectStreamer : public MCStreamer { void insert(MCFragment *F) { flushPendingLabels(F); MCSection *CurSection = getCurrentSectionOnly(); - CurSection->getFragmentList().insert(CurInsertionPoint, F); + CurSection->addFragment(*F); F->setParent(CurSection); } diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 217b9b4b5bc52b..e5455292d5c625 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -14,7 +14,6 @@ #define LLVM_MC_MCSECTION_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Alignment.h" @@ -24,20 +23,18 @@ namespace llvm { class MCAsmInfo; +class MCAssembler; class MCContext; class MCExpr; class MCSymbol; class raw_ostream; class Triple; -template <> struct ilist_alloc_traits { - static void deleteNode(MCFragment *V); -}; - /// Instances of this class represent a uniqued identifier for a section in the /// current translation unit. The MCContext class uniques and creates these. class MCSection { public: + friend MCAssembler; static constexpr unsigned NonUniqueID = ~0U; enum SectionVariant { @@ -58,12 +55,29 @@ class MCSection { BundleLockedAlignToEnd }; - using FragmentListType = iplist; + struct iterator { + MCFragment *F = nullptr; + iterator() = default; + explicit iterator(MCFragment *F) : F(F) {} + MCFragment &operator*() const { return *F; } + bool operator==(const iterator &O) const { return F == O.F; } + bool operator!=(const iterator &O) const { return F != O.F; } + iterator &operator++() { + F = F->Next; + return *this; + } + iterator operator++(int) { return iterator(F->Next); } + }; - using const_iterator = FragmentListType::const_iterator; - using iterator = FragmentListType::iterator; + struct FragList { + MCFragment *Head = nullptr; + MCFragment *Tail = nullptr; + }; private: + // At parse time, this holds the fragment list of the current subsection. At + // layout time, this holds the concatenated fragment lists of all subsections. + FragList *CurFragList; MCSymbol *Begin; MCSymbol *End = nullptr; /// The alignment requirement of this section. @@ -92,11 +106,10 @@ class MCSection { MCDummyFragment DummyFragment; - FragmentListType Fragments; - - /// Mapping from subsection number to insertion point for subsection numbers - /// below that number. - SmallVector, 1> SubsectionFragmentMap; + // Mapping from subsection number to fragment list. At layout time, the + // subsection 0 list is replaced with concatenated fragments from all + // subsections. + SmallVector, 1> Subsections; /// State for tracking labels that don't yet have Fragments struct PendingLabel { @@ -171,29 +184,27 @@ class MCSection { bool isRegistered() const { return IsRegistered; } void setIsRegistered(bool Value) { IsRegistered = Value; } - MCSection::FragmentListType &getFragmentList() { return Fragments; } - const MCSection::FragmentListType &getFragmentList() const { - return const_cast(this)->getFragmentList(); - } - - /// Support for MCFragment::getNextNode(). - static FragmentListType MCSection::*getSublistAccess(MCFragment *) { - return &MCSection::Fragments; - } - const MCDummyFragment &getDummyFragment() const { return DummyFragment; } MCDummyFragment &getDummyFragment() { return DummyFragment; } - iterator begin() { return Fragments.begin(); } - const_iterator begin() const { return Fragments.begin(); } - - iterator end() { return Fragments.end(); } - const_iterator end() const { return Fragments.end(); } - bool empty() const { return Fragments.empty(); } - - void addFragment(MCFragment &F) { Fragments.push_back(&F); } + FragList *curFragList() const { return CurFragList; } + iterator begin() const { return iterator(CurFragList->Head); } + iterator end() const { return {}; } + bool empty() const { return !CurFragList->Head; } + + void addFragment(MCFragment &F) { + // The formal layout order will be finalized in MCAssembler::layout. + if (CurFragList->Tail) { + CurFragList->Tail->Next = &F; + F.setLayoutOrder(CurFragList->Tail->getLayoutOrder() + 1); + } else { + CurFragList->Head = &F; + assert(F.getLayoutOrder() == 0); + } + CurFragList->Tail = &F; + } - MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection); + void switchSubsection(unsigned Subsection); void dump() const; diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 8490853eda87c2..4ff606d3732388 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -831,6 +831,19 @@ void MCAssembler::layout(MCAsmLayout &Layout) { MCSection *Sec = Layout.getSectionOrder()[i]; Sec->setLayoutOrder(i); + // Chain together fragments from all subsections. + MCDummyFragment Dummy(Sec); + MCFragment *Tail = &Dummy; + for (auto &[_, List] : Sec->Subsections) { + if (!List.Head) + continue; + Tail->Next = List.Head; + Tail = List.Tail; + } + Sec->Subsections.clear(); + Sec->Subsections.push_back({0u, {Dummy.getNext(), Tail}}); + Sec->CurFragList = &Sec->Subsections[0].second; + unsigned FragmentIndex = 0; for (MCFragment &Frag : *Sec) Frag.setLayoutOrder(FragmentIndex++); @@ -1094,7 +1107,7 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - for (const MCFragment *F = BF.getNextNode();; F = F->getNextNode()) { + for (const MCFragment *F = BF.getNext();; F = F->getNext()) { AlignedSize += computeFragmentSize(Layout, *F); if (F == BF.getLastFragment()) break; diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index b70ac86c18ccf1..2eecdb82d30bb0 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -661,25 +661,16 @@ static void AttemptToFoldSymbolOffsetDifference( // this is important when the Subtarget is changed and a new MCDataFragment // is created in the case of foo: instr; .arch_extension ext; instr .if . - // foo. - if (SA.isVariable() || SB.isVariable() || - FA->getSubsectionNumber() != FB->getSubsectionNumber()) + if (SA.isVariable() || SB.isVariable()) return; // Try to find a constant displacement from FA to FB, add the displacement // between the offset in FA of SA and the offset in FB of SB. bool Reverse = false; - if (FA == FB) { + if (FA == FB) Reverse = SA.getOffset() < SB.getOffset(); - } else if (!isa(FA)) { - // Testing FA < FB is slow. Use setLayoutOrder to speed up computation. - // The formal layout order will be finalized in MCAssembler::layout. - if (FA->getLayoutOrder() == 0 || FB->getLayoutOrder()== 0) { - unsigned LayoutOrder = 0; - for (MCFragment &F : *FA->getParent()) - F.setLayoutOrder(++LayoutOrder); - } + else if (!isa(FA)) Reverse = FA->getLayoutOrder() < FB->getLayoutOrder(); - } uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset(); int64_t Displacement = SA.getOffset() - SB.getOffset(); @@ -695,7 +686,7 @@ static void AttemptToFoldSymbolOffsetDifference( // instruction, the difference cannot be resolved as it may be changed by // the linker. bool BBeforeRelax = false, AAfterRelax = false; - for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) { + for (auto FI = FB; FI; FI = FI->getNext()) { auto DF = dyn_cast(FI); if (DF && DF->isLinkerRelaxable()) { if (&*FI != FB || SBOffset != DF->getContents().size()) @@ -726,12 +717,14 @@ static void AttemptToFoldSymbolOffsetDifference( return; } } - // If the previous loop does not find FA, FA must be a dummy fragment not in - // the fragment list (which means SA is a pending label (see - // flushPendingLabels)). In either case, we can resolve the difference. - assert(Found || isa(FA)); - Addend += Reverse ? -Displacement : Displacement; - FinalizeFolding(); + // If FA and FB belong to the same subsection, either the previous loop + // found FA, or FA is a dummy fragment not in the fragment list (which means + // SA is a pending label (see flushPendingLabels)) or FA and FB belong to + // different subsections. In either case, we can resolve the difference. + if (Found || isa(FA)) { + Addend += Reverse ? -Displacement : Displacement; + FinalizeFolding(); + } } } diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 84a587164c788d..6d97e8ce552baf 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -141,7 +141,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const { // The size is the last fragment's end offset. - const MCFragment &F = Sec->getFragmentList().back(); + const MCFragment &F = *Sec->curFragList()->Tail; return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F); } @@ -197,8 +197,6 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, /* *** */ -void ilist_alloc_traits::deleteNode(MCFragment *V) { V->destroy(); } - MCFragment::MCFragment(FragmentType Kind, bool HasInstructions, MCSection *Parent) : Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)), LayoutOrder(0), diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index ae4e6915fa294c..bf1ce76cdc14bd 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -180,7 +180,6 @@ void MCObjectStreamer::reset() { if (getContext().getTargetOptions()) Assembler->setRelaxAll(getContext().getTargetOptions()->MCRelaxAll); } - CurInsertionPoint = MCSection::iterator(); EmitEHFrame = true; EmitDebugFrame = false; PendingLabels.clear(); @@ -200,12 +199,7 @@ void MCObjectStreamer::emitFrames(MCAsmBackend *MAB) { } MCFragment *MCObjectStreamer::getCurrentFragment() const { - assert(getCurrentSectionOnly() && "No current section!"); - - if (CurInsertionPoint != getCurrentSectionOnly()->begin()) - return &*std::prev(CurInsertionPoint); - - return nullptr; + return getCurrentSectionOnly()->curFragList()->Tail; } static bool canReuseDataFragment(const MCDataFragment &F, @@ -391,8 +385,7 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section, } CurSubsectionIdx = unsigned(IntSubsection); - CurInsertionPoint = - Section->getSubsectionInsertionPoint(CurSubsectionIdx); + Section->switchSubsection(CurSubsectionIdx); return Created; } diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 9848d7fafe764a..1d9fe2cafd6174 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -24,7 +24,10 @@ MCSection::MCSection(SectionVariant V, StringRef Name, SectionKind K, MCSymbol *Begin) : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false), HasLayout(false), IsRegistered(false), DummyFragment(this), Name(Name), - Variant(V), Kind(K) {} + Variant(V), Kind(K) { + // The initial subsection number is 0. Create a fragment list. + CurFragList = &Subsections.emplace_back(0u, FragList{}).second; +} MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { if (!End) @@ -34,7 +37,14 @@ MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { bool MCSection::hasEnded() const { return End && End->isInSection(); } -MCSection::~MCSection() = default; +MCSection::~MCSection() { + for (auto &[_, Chain] : Subsections) { + for (MCFragment *X = Chain.Head, *Y; X; X = Y) { + Y = X->Next; + X->destroy(); + } + } +} void MCSection::setBundleLockState(BundleLockStateType NewState) { if (NewState == NotBundleLocked) { @@ -55,35 +65,15 @@ void MCSection::setBundleLockState(BundleLockStateType NewState) { ++BundleLockNestingDepth; } -MCSection::iterator -MCSection::getSubsectionInsertionPoint(unsigned Subsection) { - if (Subsection == 0 && SubsectionFragmentMap.empty()) - return end(); - - SmallVectorImpl>::iterator MI = lower_bound( - SubsectionFragmentMap, std::make_pair(Subsection, (MCFragment *)nullptr)); - bool ExactMatch = false; - if (MI != SubsectionFragmentMap.end()) { - ExactMatch = MI->first == Subsection; - if (ExactMatch) - ++MI; - } - iterator IP; - if (MI == SubsectionFragmentMap.end()) - IP = end(); - else - IP = MI->second->getIterator(); - if (!ExactMatch && Subsection != 0) { - // The GNU as documentation claims that subsections have an alignment of 4, - // although this appears not to be the case. - MCFragment *F = new MCDataFragment(); - SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F)); - getFragmentList().insert(IP, F); - F->setParent(this); - F->setSubsectionNumber(Subsection); - } - - return IP; +void MCSection::switchSubsection(unsigned Subsection) { + size_t I = 0, E = Subsections.size(); + while (I != E && Subsections[I].first < Subsection) + ++I; + // If the subsection number is not in the sorted Subsections list, create a + // new fragment list. + if (I == E || Subsections[I].first != Subsection) + Subsections.insert(Subsections.begin() + I, {Subsection, FragList{}}); + CurFragList = &Subsections[I].second; } StringRef MCSection::getVirtualSectionKind() const { return "virtual"; } @@ -111,13 +101,11 @@ void MCSection::flushPendingLabels() { // creating new empty data fragments for each Subsection with labels pending. while (!PendingLabels.empty()) { PendingLabel& Label = PendingLabels[0]; - iterator CurInsertionPoint = - this->getSubsectionInsertionPoint(Label.Subsection); - const MCSymbol *Atom = nullptr; - if (CurInsertionPoint != begin()) - Atom = std::prev(CurInsertionPoint)->getAtom(); + switchSubsection(Label.Subsection); + const MCSymbol *Atom = + CurFragList->Tail ? CurFragList->Tail->getAtom() : nullptr; MCFragment *F = new MCDataFragment(); - getFragmentList().insert(CurInsertionPoint, F); + addFragment(*F); F->setParent(this); F->setAtom(Atom); flushPendingLabels(F, 0, Label.Subsection); diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 451269608f1799..522e268156aa3c 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1864,15 +1864,14 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); - IT = std::next(IT); - const MCFragment &AlignFrag = *IT; + const MCFragment &AlignFrag = *EmptyFrag.getNext(); if (AlignFrag.getKind() != MCFragment::FT_Align) report_fatal_error(".init_array section should be aligned"); if (cast(AlignFrag).getAlignment() != Align(is64Bit() ? 8 : 4)) report_fatal_error(".init_array section should be aligned for pointers"); - const MCFragment &Frag = *std::next(IT); + const MCFragment &Frag = *AlignFrag.getNext(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .init_array section"); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 3b6ea81cdf10ed..54efe4bc25efe7 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -712,17 +712,20 @@ class HexagonAsmBackend : public MCAsmBackend { void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override { + SmallVector Frags; for (auto *I : Layout.getSectionOrder()) { - for (auto &J : *I) { - switch (J.getKind()) { + Frags.clear(); + for (MCFragment &F : *I) + Frags.push_back(&F); + for (size_t J = 0, E = Frags.size(); J != E; ++J) { + switch (Frags[J]->getKind()) { default: break; case MCFragment::FT_Align: { - auto Size = Asm.computeFragmentSize(Layout, J); - for (auto K = J.getIterator(); - K != I->begin() && Size >= HEXAGON_PACKET_SIZE;) { + auto Size = Asm.computeFragmentSize(Layout, *Frags[J]); + for (auto K = J; K != 0 && Size >= HEXAGON_PACKET_SIZE;) { --K; - switch (K->getKind()) { + switch (Frags[K]->getKind()) { default: break; case MCFragment::FT_Align: { @@ -732,7 +735,7 @@ class HexagonAsmBackend : public MCAsmBackend { } case MCFragment::FT_Relaxable: { MCContext &Context = Asm.getContext(); - auto &RF = cast(*K); + auto &RF = cast(*Frags[K]); auto &Inst = const_cast(RF.getInst()); while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index b8e0f3a867f402..d83dadd3016193 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -62,7 +62,7 @@ const MCFixup *RISCVMCExpr::getPCRelHiFixup(const MCFragment **DFOut) const { uint64_t Offset = AUIPCSymbol->getOffset(); if (DF->getContents().size() == Offset) { - DF = dyn_cast_or_null(DF->getNextNode()); + DF = dyn_cast_or_null(DF->getNext()); if (!DF) return nullptr; Offset = 0; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index bc2eb6dcd541c7..1b8462f2d258ca 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -530,7 +530,7 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, if (!canPadInst(Inst, OS)) return; - if (PendingBA && PendingBA->getNextNode() == OS.getCurrentFragment()) { + if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // @@ -978,8 +978,8 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm, // The layout is done. Mark every fragment as valid. for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { MCSection &Section = *Layout.getSectionOrder()[i]; - Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); - Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); + Layout.getFragmentOffset(&*Section.curFragList()->Tail); + Asm.computeFragmentSize(Layout, *Section.curFragList()->Tail); } } From 2e007b89c65eeb33baf1b40103284d8937700cf0 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Tue, 11 Jun 2024 09:28:10 -0700 Subject: [PATCH 09/38] [lldb] Skip TestAttachDenied under asan Like many other tests, this one times out when run under the address sanitizer. To reduce noise, this commit skips it in those builds. --- .../commands/process/attach/attach_denied/TestAttachDenied.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py index 22dca62045022e..d72a710e8127bf 100644 --- a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py +++ b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py @@ -18,6 +18,7 @@ class AttachDeniedTestCase(TestBase): @skipIfWindows @skipIfiOSSimulator @skipIfDarwinEmbedded # ptrace(ATTACH_REQUEST...) won't work on ios/tvos/etc + @skipIfAsan # Times out inconsistently under asan def test_attach_to_process_by_id_denied(self): """Test attach by process id denied""" self.build() From 5ccdce95b4842a472ee466e4ad1bdfa4e9eb31dc Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 11 Jun 2024 17:34:15 +0100 Subject: [PATCH 10/38] [test] Skip some tests on Windows only (#95095) These tests pass on Linux using lit's internal shell. --- llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test | 2 +- llvm/test/tools/llvm-cov/gcov/basic.test | 2 +- llvm/test/tools/llvm-rc/windres-preproc.test | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test b/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test index 43d5a2c818fac3..c0d6f83e4af0d5 100644 --- a/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test +++ b/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test @@ -1,4 +1,4 @@ -# REQUIRES: shell +# UNSUPPORTED: system-windows # Ensures that .debuglink can fallback to a separate location. This is normally # /usr/lib/debug (or /usr/libdata/debug for NetBSD), but can be configured on # the command line (mainly for testing). diff --git a/llvm/test/tools/llvm-cov/gcov/basic.test b/llvm/test/tools/llvm-cov/gcov/basic.test index 5313fe2d7a69a1..7557739add8ba5 100644 --- a/llvm/test/tools/llvm-cov/gcov/basic.test +++ b/llvm/test/tools/llvm-cov/gcov/basic.test @@ -3,7 +3,7 @@ # Test fails on Windows where internal shell is used due to path separator # mismatches. -REQUIRES: shell +UNSUPPORTED: system-windows RUN: rm -rf %t RUN: mkdir %t diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test index 13f82299a074bb..52427862e760b8 100644 --- a/llvm/test/tools/llvm-rc/windres-preproc.test +++ b/llvm/test/tools/llvm-rc/windres-preproc.test @@ -1,7 +1,6 @@ ;; Some quoted arguments below don't work properly on Windows when llvm-lit -;; invokes the cmd shell to run the commands. Just require running in a -;; posix shell, to keep being able to test this corner case on Unix at least. -; REQUIRES: shell +;; invokes the cmd shell to run the commands. +; UNSUPPORTED: system-windows ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1 From a03e93e1b2172791085f3f8c293b8e5d6ed8d841 Mon Sep 17 00:00:00 2001 From: David Parks Date: Tue, 11 Jun 2024 10:40:00 -0600 Subject: [PATCH 11/38] [flang] Add runtime support for Fortran intrinsic ERFC_SCALED (#95040) Co-authored-by: David Parks --- .../flang/Optimizer/Builder/IntrinsicCall.h | 2 + .../flang/Optimizer/Builder/Runtime/Numeric.h | 4 + flang/include/flang/Runtime/numeric.h | 14 +++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 11 ++ .../lib/Optimizer/Builder/Runtime/Numeric.cpp | 46 ++++++++ flang/runtime/numeric-templates.h | 104 ++++++++++++++++++ flang/runtime/numeric.cpp | 21 ++++ flang/test/Lower/Intrinsics/erfc_scaled.f90 | 23 ++++ flang/unittests/Runtime/Numeric.cpp | 8 ++ 9 files changed, 233 insertions(+) create mode 100644 flang/test/Lower/Intrinsics/erfc_scaled.f90 diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 52f2034b8707a3..ec1fb411ff0e25 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -204,6 +204,8 @@ struct IntrinsicLibrary { llvm::ArrayRef); fir::ExtendedValue genCAssociatedCPtr(mlir::Type, llvm::ArrayRef); + mlir::Value genErfcScaled(mlir::Type resultType, + llvm::ArrayRef args); void genCFPointer(llvm::ArrayRef); void genCFProcPointer(llvm::ArrayRef); fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h index 558358257b5134..6857650ce52b74 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h @@ -18,6 +18,10 @@ class FirOpBuilder; namespace fir::runtime { +/// Generate call to ErfcScaled intrinsic runtime routine. +mlir::Value genErfcScaled(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value x); + /// Generate call to Exponent intrinsic runtime routine. mlir::Value genExponent(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type resultType, mlir::Value x); diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h index 7d3f91360c8cfb..e051e864316630 100644 --- a/flang/include/flang/Runtime/numeric.h +++ b/flang/include/flang/Runtime/numeric.h @@ -73,6 +73,20 @@ CppTypeFor RTDECL(Ceiling16_16)( #endif #endif +// ERFC_SCALED +CppTypeFor RTDECL(ErfcScaled4)( + CppTypeFor); +CppTypeFor RTDECL(ErfcScaled8)( + CppTypeFor); +#if LDBL_MANT_DIG == 64 +CppTypeFor RTDECL(ErfcScaled10)( + CppTypeFor); +#endif +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +CppTypeFor RTDECL(ErfcScaled16)( + CppTypeFor); +#endif + // EXPONENT is defined to return default INTEGER; support INTEGER(4 & 8) CppTypeFor RTDECL(Exponent4_4)( CppTypeFor); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 4cdf1f2d98caa4..4317806561693c 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -224,6 +224,7 @@ static constexpr IntrinsicHandler handlers[]{ {"boundary", asBox, handleDynamicOptional}, {"dim", asValue}}}, /*isElemental=*/false}, + {"erfc_scaled", &I::genErfcScaled}, {"etime", &I::genEtime, {{{"values", asBox}, {"time", asBox}}}, @@ -5878,6 +5879,16 @@ mlir::Value IntrinsicLibrary::genRRSpacing(mlir::Type resultType, fir::runtime::genRRSpacing(builder, loc, fir::getBase(args[0]))); } +// ERFC_SCALED +mlir::Value IntrinsicLibrary::genErfcScaled(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 1); + + return builder.createConvert( + loc, resultType, + fir::runtime::genErfcScaled(builder, loc, fir::getBase(args[0]))); +} + // SAME_TYPE_AS fir::ExtendedValue IntrinsicLibrary::genSameTypeAs(mlir::Type resultType, diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp index 8ac9d64f576b6a..1d13248db59841 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp @@ -22,6 +22,28 @@ using namespace Fortran::runtime; // may not have them in their runtime library. This can occur in the // case of cross compilation, for example. +/// Placeholder for real*10 version of ErfcScaled Intrinsic +struct ForcedErfcScaled10 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled10)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + return mlir::FunctionType::get(ctx, {ty}, {ty}); + }; + } +}; + +/// Placeholder for real*16 version of ErfcScaled Intrinsic +struct ForcedErfcScaled16 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled16)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + return mlir::FunctionType::get(ctx, {ty}, {ty}); + }; + } +}; + /// Placeholder for real*10 version of Exponent Intrinsic struct ForcedExponent10_4 { static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Exponent10_4)); @@ -444,6 +466,30 @@ mlir::Value fir::runtime::genRRSpacing(fir::FirOpBuilder &builder, return builder.create(loc, func, args).getResult(0); } +/// Generate call to ErfcScaled intrinsic runtime routine. +mlir::Value fir::runtime::genErfcScaled(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value x) { + mlir::func::FuncOp func; + mlir::Type fltTy = x.getType(); + + if (fltTy.isF32()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF64()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF80()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF128()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else + fir::intrinsicTypeTODO(builder, fltTy, loc, "ERFC_SCALED"); + + auto funcTy = func.getFunctionType(); + llvm::SmallVector args = { + builder.createConvert(loc, funcTy.getInput(0), x)}; + + return builder.create(loc, func, args).getResult(0); +} + /// Generate call to Scale intrinsic runtime routine. mlir::Value fir::runtime::genScale(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value x, diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h index 4936e7738a663e..1b5395df945193 100644 --- a/flang/runtime/numeric-templates.h +++ b/flang/runtime/numeric-templates.h @@ -354,6 +354,110 @@ template inline RT_API_ATTRS T Spacing(T x) { } } +// ERFC_SCALED (16.9.71) +template inline RT_API_ATTRS T ErfcScaled(T arg) { + // Coefficients for approximation to erfc in the first interval. + static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02, + 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1}; + static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02, + 1.28261652607737228e03, 2.84423683343917062e03}; + + // Coefficients for approximation to erfc in the second interval. + static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00, + 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, + 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03, + 2.15311535474403846e-8}; + static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02, + 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, + 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03}; + + // Coefficients for approximation to erfc in the third interval. + static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1, + 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4, + 1.63153871373020978e-2}; + static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00, + 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3}; + + constexpr T sqrtpi{1.7724538509078120380404576221783883301349L}; + constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L}; + constexpr T epsilonby2{std::numeric_limits::epsilon() * 0.5}; + constexpr T xneg{-26.628e0}; + constexpr T xhuge{6.71e7}; + constexpr T thresh{0.46875e0}; + constexpr T zero{0.0}; + constexpr T one{1.0}; + constexpr T four{4.0}; + constexpr T sixteen{16.0}; + constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits::min())}; + static_assert(xmax > xhuge, "xmax must be greater than xhuge"); + + T ysq; + T xnum; + T xden; + T del; + T result; + + auto x{arg}; + auto y{std::fabs(x)}; + + if (y <= thresh) { + // evaluate erf for |x| <= 0.46875 + ysq = zero; + if (y > epsilonby2) { + ysq = y * y; + } + xnum = a[4] * ysq; + xden = ysq; + for (int i{0}; i < 3; i++) { + xnum = (xnum + a[i]) * ysq; + xden = (xden + b[i]) * ysq; + } + result = x * (xnum + a[3]) / (xden + b[3]); + result = one - result; + result = std::exp(ysq) * result; + return result; + } else if (y <= four) { + // evaluate erfc for 0.46875 < |x| <= 4.0 + xnum = c[8] * y; + xden = y; + for (int i{0}; i < 7; ++i) { + xnum = (xnum + c[i]) * y; + xden = (xden + d[i]) * y; + } + result = (xnum + c[7]) / (xden + d[7]); + } else { + // evaluate erfc for |x| > 4.0 + result = zero; + if (y >= xhuge) { + if (y < xmax) { + result = rsqrtpi / y; + } + } else { + ysq = one / (y * y); + xnum = p[5] * ysq; + xden = ysq; + for (int i{0}; i < 4; ++i) { + xnum = (xnum + p[i]) * ysq; + xden = (xden + q[i]) * ysq; + } + result = ysq * (xnum + p[4]) / (xden + q[4]); + result = (rsqrtpi - result) / y; + } + } + // fix up for negative argument, erf, etc. + if (x < zero) { + if (x < xneg) { + result = std::numeric_limits::max(); + } else { + ysq = trunc(x * sixteen) / sixteen; + del = (x - ysq) * (x + ysq); + y = std::exp((ysq * ysq)) * std::exp((del)); + result = (y + y) - result; + } + } + return result; +} + } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_ diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp index 2225473c4690e2..7c40beb31083ff 100644 --- a/flang/runtime/numeric.cpp +++ b/flang/runtime/numeric.cpp @@ -316,6 +316,27 @@ CppTypeFor RTDEF(Ceiling16_16)( #endif #endif +CppTypeFor RTDEF(ErfcScaled4)( + CppTypeFor x) { + return ErfcScaled(x); +} +CppTypeFor RTDEF(ErfcScaled8)( + CppTypeFor x) { + return ErfcScaled(x); +} +#if LDBL_MANT_DIG == 64 +CppTypeFor RTDEF(ErfcScaled10)( + CppTypeFor x) { + return ErfcScaled(x); +} +#endif +#if LDBL_MANT_DIG == 113 +CppTypeFor RTDEF(ErfcScaled16)( + CppTypeFor x) { + return ErfcScaled(x); +} +#endif + CppTypeFor RTDEF(Exponent4_4)( CppTypeFor x) { return Exponent>(x); diff --git a/flang/test/Lower/Intrinsics/erfc_scaled.f90 b/flang/test/Lower/Intrinsics/erfc_scaled.f90 new file mode 100644 index 00000000000000..ab5e90cb2409ea --- /dev/null +++ b/flang/test/Lower/Intrinsics/erfc_scaled.f90 @@ -0,0 +1,23 @@ +! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPerfc_scaled4( +! CHECK-SAME: %[[x:[^:]+]]: !fir.ref{{.*}}) -> f32 +function erfc_scaled4(x) + real(kind=4) :: erfc_scaled4 + real(kind=4) :: x + erfc_scaled4 = erfc_scaled(x); +! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref +! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled4(%[[a1]]) {{.*}}: (f32) -> f32 +end function erfc_scaled4 + + +! CHECK-LABEL: func @_QPerfc_scaled8( +! CHECK-SAME: %[[x:[^:]+]]: !fir.ref{{.*}}) -> f64 +function erfc_scaled8(x) + real(kind=8) :: erfc_scaled8 + real(kind=8) :: x + erfc_scaled8 = erfc_scaled(x); +! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref +! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled8(%[[a1]]) {{.*}}: (f64) -> f64 +end function erfc_scaled8 diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp index b69ff21ea79fb0..9f77e165707834 100644 --- a/flang/unittests/Runtime/Numeric.cpp +++ b/flang/unittests/Runtime/Numeric.cpp @@ -31,6 +31,14 @@ TEST(Numeric, Floor) { EXPECT_EQ(RTNAME(Floor4_1)(Real<4>{0}), 0); } +TEST(Numeric, Erfc_scaled) { + EXPECT_NEAR(RTNAME(ErfcScaled4)(Real<4>{20.0}), 0.02817434874, 1.0e-8); + EXPECT_NEAR(RTNAME(ErfcScaled8)(Real<8>{20.0}), 0.02817434874, 1.0e-11); +#if LDBL_MANT_DIG == 64 + EXPECT_NEAR(RTNAME(ErfcScaled10)(Real<10>{20.0}), 0.02817434874, 1.0e-8); +#endif +} + TEST(Numeric, Exponent) { EXPECT_EQ(RTNAME(Exponent4_4)(Real<4>{0}), 0); EXPECT_EQ(RTNAME(Exponent4_8)(Real<4>{1.0}), 1); From 71497cc7a4695d22fc5dfd64958744816c15a19e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 11 Jun 2024 10:50:13 -0600 Subject: [PATCH 12/38] [CGSCC] Fix compile time blowup with large RefSCCs (#94815) In some modules, e.g. Kotlin-generated IR, we end up with a huge RefSCC and the call graph updates done as a result of the inliner take a long time. This is due to RefSCC::removeInternalRefEdges() getting called many times, each time removing one function from the RefSCC, but each call to removeInternalRefEdges() is proportional to the size of the RefSCC. There are two places that call removeInternalRefEdges(), in updateCGAndAnalysisManagerForPass() and LazyCallGraph::removeDeadFunction(). 1) Since LazyCallGraph can deal with spurious (edges that exist in the graph but not in the IR) ref edges, we can simply not call removeInternalRefEdges() in updateCGAndAnalysisManagerForPass(). 2) LazyCallGraph::removeDeadFunction() still ends up taking the brunt of compile time with the above change for the original reason. So instead we batch all the dead function removals so we can call removeInternalRefEdges() just once. This requires some changes to callers of removeDeadFunction() to not actually erase the function from the module, but defer it to when we batch delete dead functions at the end of the CGSCC run, leaving the function body as "unreachable" in the meantime. We still need to ensure that call edges are accurate. I had also tried deleting dead functions after visiting a RefSCC, but deleting them all at once at the end was simpler. Many test changes are due to not performing unnecessary revisits of an SCC (the CGSCC infrastructure deems ref edge refinements as unimportant when it comes to revisiting SCCs, although that seems to not be consistently true given these changes) because we don't remove some ref edges. Specifically for devirt-invalidated.ll this seems to expose an inlining order issue with the inliner. Probably unimportant for this type of intentionally weird call graph. Compile time: https://llvm-compile-time-tracker.com/compare.php?from=6f2c61071c274a1b5e212e6ad4114641ec7c7fc3&to=b08c90d05e290dd065755ea776ceaf1420680224&stat=instructions:u --- llvm/include/llvm/Analysis/CGSCCPassManager.h | 4 + llvm/include/llvm/Analysis/LazyCallGraph.h | 22 ++-- llvm/lib/Analysis/CGSCCPassManager.cpp | 42 ++---- llvm/lib/Analysis/LazyCallGraph.cpp | 123 +++++++++--------- llvm/lib/Transforms/IPO/Inliner.cpp | 29 ++--- .../lib/Transforms/Utils/CallGraphUpdater.cpp | 12 +- .../test/Other/cgscc-refscc-mutation-order.ll | 2 - llvm/test/Other/devirt-invalidated.ll | 2 - .../live_called_from_dead.ll | 1 - .../live_called_from_dead_2.ll | 1 - .../Transforms/Inline/cgscc-cycle-debug.ll | 1 - .../Analysis/CGSCCPassManagerTest.cpp | 12 +- llvm/unittests/Analysis/LazyCallGraphTest.cpp | 29 +++-- 13 files changed, 132 insertions(+), 148 deletions(-) diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index 5654ad46d6eab0..b19d53621ac867 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -306,6 +306,10 @@ struct CGSCCUpdateResult { SmallDenseSet, 4> &InlinedInternalEdges; + /// Functions that a pass has considered to be dead to be removed at the end + /// of the call graph walk in batch. + SmallVector &DeadFunctions; + /// Weak VHs to keep track of indirect calls for the purposes of detecting /// devirtualization. /// diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index ac8ca207d312b7..a8bbf2c578af9b 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -832,7 +832,7 @@ class LazyCallGraph { /// self-edges and edge removals which result in a spanning tree with no /// more cycles. [[nodiscard]] SmallVector - removeInternalRefEdge(Node &SourceN, ArrayRef TargetNs); + removeInternalRefEdges(ArrayRef> Edges); /// A convenience wrapper around the above to handle trivial cases of /// inserting a new call edge. @@ -1056,18 +1056,18 @@ class LazyCallGraph { /// once SCCs have started to be formed. These routines have strict contracts /// but may be called at any point. - /// Remove a dead function from the call graph (typically to delete it). + /// Remove dead functions from the call graph. /// - /// Note that the function must have an empty use list, and the call graph - /// must be up-to-date prior to calling this. That means it is by itself in - /// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural - /// changes result from calling this routine other than potentially removing - /// entry points into the call graph. + /// These functions should have already been passed to markDeadFunction(). + /// This is done as a batch to prevent compile time blowup as a result of + /// handling a single function at a time. + void removeDeadFunctions(ArrayRef DeadFs); + + /// Mark a function as dead to be removed later by removeDeadFunctions(). /// - /// If SCC formation has begun, this function must not be part of the current - /// DFS in order to call this safely. Typically, the function will have been - /// fully visited by the DFS prior to calling this routine. - void removeDeadFunction(Function &F); + /// The function body should have no incoming or outgoing call or ref edges. + /// For example, a function with a single "unreachable" instruction. + void markDeadFunction(Function &F); /// Add a new function split/outlined from an existing function. /// diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index 8ae5c3dee6103e..2ed1d98f800688 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -158,10 +158,12 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { SmallDenseSet, 4> InlinedInternalEdges; + SmallVector DeadFunctions; + CGSCCUpdateResult UR = { - RCWorklist, CWorklist, InvalidRefSCCSet, - InvalidSCCSet, nullptr, PreservedAnalyses::all(), - InlinedInternalEdges, {}}; + RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, PreservedAnalyses::all(), + InlinedInternalEdges, DeadFunctions, {}}; // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. @@ -340,6 +342,10 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { } while (!RCWorklist.empty()); } + CG.removeDeadFunctions(DeadFunctions); + for (Function *DeadF : DeadFunctions) + DeadF->eraseFromParent(); + #if defined(EXPENSIVE_CHECKS) // Verify that the call graph is still valid. CG.verify(); @@ -1030,36 +1036,6 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( return true; }); - // Now do a batch removal of the internal ref edges left. - auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets); - if (!NewRefSCCs.empty()) { - // The old RefSCC is dead, mark it as such. - UR.InvalidatedRefSCCs.insert(RC); - - // Note that we don't bother to invalidate analyses as ref-edge - // connectivity is not really observable in any way and is intended - // exclusively to be used for ordering of transforms rather than for - // analysis conclusions. - - // Update RC to the "bottom". - assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); - RC = &C->getOuterRefSCC(); - assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); - - // The RC worklist is in reverse postorder, so we enqueue the new ones in - // RPO except for the one which contains the source node as that is the - // "bottom" we will continue processing in the bottom-up walk. - assert(NewRefSCCs.front() == RC && - "New current RefSCC not first in the returned list!"); - for (RefSCC *NewRC : llvm::reverse(llvm::drop_begin(NewRefSCCs))) { - assert(NewRC != RC && "Should not encounter the current RefSCC further " - "in the postorder list of new RefSCCs."); - UR.RCWorklist.insert(NewRC); - LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " - << *NewRC << "\n"); - } - } - // Next demote all the call edges that are now ref edges. This helps make // the SCCs small which should minimize the work below as we don't want to // form cycles that this would break. diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index 48a7ca0061600b..e6bf8c9cbb289f 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -1160,8 +1160,8 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { } SmallVector -LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, - ArrayRef TargetNs) { +LazyCallGraph::RefSCC::removeInternalRefEdges( + ArrayRef> Edges) { // We return a list of the resulting *new* RefSCCs in post-order. SmallVector Result; @@ -1179,25 +1179,21 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, #endif // First remove the actual edges. - for (Node *TargetN : TargetNs) { - assert(!(*SourceN)[*TargetN].isCall() && + for (auto [SourceN, TargetN] : Edges) { + assert(!(**SourceN)[*TargetN].isCall() && "Cannot remove a call edge, it must first be made a ref edge"); - bool Removed = SourceN->removeEdgeInternal(*TargetN); + bool Removed = (*SourceN)->removeEdgeInternal(*TargetN); (void)Removed; assert(Removed && "Target not in the edge set for this caller?"); } // Direct self references don't impact the ref graph at all. - if (llvm::all_of(TargetNs, - [&](Node *TargetN) { return &SourceN == TargetN; })) - return Result; - // If all targets are in the same SCC as the source, because no call edges // were removed there is no RefSCC structure change. - SCC &SourceC = *G->lookupSCC(SourceN); - if (llvm::all_of(TargetNs, [&](Node *TargetN) { - return G->lookupSCC(*TargetN) == &SourceC; + if (llvm::all_of(Edges, [&](std::pair E) { + return E.first == E.second || + G->lookupSCC(*E.first) == G->lookupSCC(*E.second); })) return Result; @@ -1499,7 +1495,7 @@ void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) { assert(Removed && "Target not in the edge set for this caller?"); } -void LazyCallGraph::removeDeadFunction(Function &F) { +void LazyCallGraph::markDeadFunction(Function &F) { // FIXME: This is unnecessarily restrictive. We should be able to remove // functions which recursively call themselves. assert(F.hasZeroLiveUses() && @@ -1515,57 +1511,66 @@ void LazyCallGraph::removeDeadFunction(Function &F) { Node &N = *NI->second; - // Cannot remove a function which has yet to be visited in the DFS walk, so - // if we have a node at all then we must have an SCC and RefSCC. - auto CI = SCCMap.find(&N); - assert(CI != SCCMap.end() && - "Tried to remove a node without an SCC after DFS walk started!"); - SCC &C = *CI->second; - RefSCC *RC = &C.getOuterRefSCC(); - - // In extremely rare cases, we can delete a dead function which is still in a - // non-trivial RefSCC. This can happen due to spurious ref edges sticking - // around after an IR function reference is removed. - if (RC->size() != 1) { - SmallVector NodesInRC; - for (SCC &OtherC : *RC) { - for (Node &OtherN : OtherC) - NodesInRC.push_back(&OtherN); + // Remove all call edges out of dead function. + for (Edge E : *N) { + if (E.isCall()) + N->setEdgeKind(E.getNode(), Edge::Ref); + } +} + +void LazyCallGraph::removeDeadFunctions(ArrayRef DeadFs) { + if (DeadFs.empty()) + return; + + // Group dead functions by the RefSCC they're in. + DenseMap> RCs; + for (Function *DeadF : DeadFs) { + Node *N = lookup(*DeadF); +#ifndef NDEBUG + for (Edge &E : **N) { + assert(!E.isCall() && + "dead function shouldn't have any outgoing call edges"); } - for (Node *OtherN : NodesInRC) { - if ((*OtherN)->lookup(N)) { - auto NewRefSCCs = - RC->removeInternalRefEdge(*OtherN, ArrayRef(&N)); - // If we've split into multiple RefSCCs, RC is now invalid and the - // RefSCC containing C will be different. - if (!NewRefSCCs.empty()) - RC = &C.getOuterRefSCC(); +#endif + RefSCC *RC = lookupRefSCC(*N); + RCs[RC].push_back(N); + } + // Remove outgoing edges from all dead functions. Dead functions should + // already have had their call edges removed in markDeadFunction(), so we only + // need to worry about spurious ref edges. + for (auto [RC, DeadNs] : RCs) { + SmallVector> InternalEdgesToRemove; + for (Node *DeadN : DeadNs) { + for (Edge &E : **DeadN) { + if (lookupRefSCC(E.getNode()) == RC) + InternalEdgesToRemove.push_back({DeadN, &E.getNode()}); + else + RC->removeOutgoingEdge(*DeadN, E.getNode()); } } + // We ignore the returned RefSCCs since at this point we're done with CGSCC + // iteration and don't need to add it to any worklists. + (void)RC->removeInternalRefEdges(InternalEdgesToRemove); + for (Node *DeadN : DeadNs) { + RefSCC *DeadRC = lookupRefSCC(*DeadN); + assert(DeadRC->size() == 1); + assert(DeadRC->begin()->size() == 1); + DeadRC->clear(); + DeadRC->G = nullptr; + } } + // Clean up data structures. + for (Function *DeadF : DeadFs) { + Node &N = *lookup(*DeadF); + + EntryEdges.removeEdgeInternal(N); + SCCMap.erase(SCCMap.find(&N)); + NodeMap.erase(NodeMap.find(DeadF)); - NodeMap.erase(NI); - EntryEdges.removeEdgeInternal(N); - SCCMap.erase(CI); - - // This node must be the only member of its SCC as it has no callers, and - // that SCC must be the only member of a RefSCC as it has no references. - // Validate these properties first. - assert(C.size() == 1 && "Dead functions must be in a singular SCC"); - assert(RC->size() == 1 && "Dead functions must be in a singular RefSCC"); - - // Finally clear out all the data structures from the node down through the - // components. postorder_ref_scc_iterator will skip empty RefSCCs, so no need - // to adjust LazyCallGraph data structures. - N.clear(); - N.G = nullptr; - N.F = nullptr; - C.clear(); - RC->clear(); - RC->G = nullptr; - - // Nothing to delete as all the objects are allocated in stable bump pointer - // allocators. + N.clear(); + N.G = nullptr; + N.F = nullptr; + } } // Gets the Edge::Kind from one function to another by looking at the function's diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index a9747aebf67bbc..1a7b9bc8e3e770 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -197,6 +197,14 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, return *IAA->getAdvisor(); } +void makeFunctionBodyUnreachable(Function &F) { + F.dropAllReferences(); + for (BasicBlock &BB : make_early_inc_range(F)) + BB.eraseFromParent(); + BasicBlock *BB = BasicBlock::Create(F.getContext(), "", &F); + new UnreachableInst(F.getContext(), BB); +} + PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { @@ -448,11 +456,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, }), Calls.end()); - // Clear the body and queue the function itself for deletion when we - // finish inlining and call graph updates. - // Note that after this point, it is an error to do anything other - // than use the callee's address or delete it. - Callee.dropAllReferences(); + // Clear the body and queue the function itself for call graph + // updating when we finish inlining. + makeFunctionBodyUnreachable(Callee); assert(!is_contained(DeadFunctions, &Callee) && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); @@ -530,7 +536,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (!DeadFunctionsInComdats.empty()) { filterDeadComdatFunctions(DeadFunctionsInComdats); for (auto *Callee : DeadFunctionsInComdats) - Callee->dropAllReferences(); + makeFunctionBodyUnreachable(*Callee); DeadFunctions.append(DeadFunctionsInComdats); } @@ -542,25 +548,18 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { + CG.markDeadFunction(*DeadF); // Get the necessary information out of the call graph and nuke the // function there. Also, clear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); - auto &DeadRC = DeadC.getOuterRefSCC(); - CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); - UR.InvalidatedRefSCCs.insert(&DeadRC); - - // If the updated SCC was the one containing the deleted function, clear it. - if (&DeadC == UR.UpdatedC) - UR.UpdatedC = nullptr; - // And delete the actual function from the module. - M.getFunctionList().erase(DeadF); + UR.DeadFunctions.push_back(DeadF); ++NumDeleted; } diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index d0b9884aa9099b..e9f37d4044cb02 100644 --- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -67,16 +67,20 @@ bool CallGraphUpdater::finalize() { FAM.clear(*DeadFn, DeadFn->getName()); AM->clear(*DeadSCC, DeadSCC->getName()); - LCG->removeDeadFunction(*DeadFn); + LCG->markDeadFunction(*DeadFn); // Mark the relevant parts of the call graph as invalid so we don't // visit them. UR->InvalidatedSCCs.insert(DeadSCC); UR->InvalidatedRefSCCs.insert(&DeadRC); + UR->DeadFunctions.push_back(DeadFn); + } else { + // The CGSCC infrastructure batch deletes functions at the end of the + // call graph walk, so only erase the function if we're not using that + // infrastructure. + // The function is now really dead and de-attached from everything. + DeadFn->eraseFromParent(); } - - // The function is now really dead and de-attached from everything. - DeadFn->eraseFromParent(); } } diff --git a/llvm/test/Other/cgscc-refscc-mutation-order.ll b/llvm/test/Other/cgscc-refscc-mutation-order.ll index 13a46503c1f4c9..aa207357157633 100644 --- a/llvm/test/Other/cgscc-refscc-mutation-order.ll +++ b/llvm/test/Other/cgscc-refscc-mutation-order.ll @@ -15,8 +15,6 @@ ; CHECK-NOT: InstCombinePass ; CHECK: Running pass: InstCombinePass on f4 ; CHECK-NOT: InstCombinePass -; CHECK: Running pass: InstCombinePass on f1 -; CHECK-NOT: InstCombinePass @a1 = alias void (), ptr @f1 @a2 = alias void (), ptr @f2 diff --git a/llvm/test/Other/devirt-invalidated.ll b/llvm/test/Other/devirt-invalidated.ll index c3ed5e53b3b04c..7926641dda97bb 100644 --- a/llvm/test/Other/devirt-invalidated.ll +++ b/llvm/test/Other/devirt-invalidated.ll @@ -1,8 +1,6 @@ ; RUN: opt -passes='devirt<0>(inline)' < %s -S | FileCheck %s -; CHECK-NOT: internal ; CHECK: define void @e() -; CHECK-NOT: internal define void @e() { entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 2df81d6cb1832d..1c34fff8dd7554 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -37,7 +37,6 @@ define internal i32 @caller(ptr %B) { ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index 7c28de24beea27..b42647840f7cfc 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -54,7 +54,6 @@ define internal i32 @caller(ptr %B) { ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll b/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll index 40a6b0577e7dd0..e79700e8dac624 100644 --- a/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll +++ b/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll @@ -13,7 +13,6 @@ ; CHECK: Running an SCC pass across the RefSCC: [(test1_a, test1_b, test1_c)] ; CHECK: Enqueuing the existing SCC in the worklist:(test1_b) ; CHECK: Enqueuing a newly formed SCC:(test1_c) -; CHECK: Enqueuing a new RefSCC in the update worklist: [(test1_b)] ; CHECK: Switch an internal ref edge to a call edge from 'test1_a' to 'test1_c' ; CHECK: Switch an internal ref edge to a call edge from 'test1_a' to 'test1_a' ; CHECK: Re-running SCC passes after a refinement of the current SCC: (test1_c, test1_a) diff --git a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp index b33567dd602b0e..aab148c12c4164 100644 --- a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp +++ b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp @@ -1659,18 +1659,16 @@ TEST_F(CGSCCPassManagerTest, TestUpdateCGAndAnalysisManagerForPasses9) { Function *FnF = M->getFunction("f"); // Use the CallGraphUpdater to update the call graph. - { - CallGraphUpdater CGU; - CGU.initialize(CG, C, AM, UR); - ASSERT_NO_FATAL_FAILURE(CGU.removeFunction(*FnF)); - ASSERT_EQ(M->getFunctionList().size(), 6U); - } - ASSERT_EQ(M->getFunctionList().size(), 5U); + CallGraphUpdater CGU; + CGU.initialize(CG, C, AM, UR); + ASSERT_NO_FATAL_FAILURE(CGU.removeFunction(*FnF)); + ASSERT_EQ(M->getFunctionList().size(), 6U); })); ModulePassManager MPM; MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.run(*M, MAM); + ASSERT_EQ(M->getFunctionList().size(), 5U); } TEST_F(CGSCCPassManagerTest, TestUpdateCGAndAnalysisManagerForPasses10) { diff --git a/llvm/unittests/Analysis/LazyCallGraphTest.cpp b/llvm/unittests/Analysis/LazyCallGraphTest.cpp index 69af7d92c7cf0d..64b6ccddc53b0c 100644 --- a/llvm/unittests/Analysis/LazyCallGraphTest.cpp +++ b/llvm/unittests/Analysis/LazyCallGraphTest.cpp @@ -1169,7 +1169,7 @@ TEST(LazyCallGraphTest, InlineAndDeleteFunction) { LazyCallGraph::SCC &NewDC = *NewCs.begin(); EXPECT_EQ(&NewDC, CG.lookupSCC(D1)); EXPECT_EQ(&NewDC, CG.lookupSCC(D3)); - auto NewRCs = DRC.removeInternalRefEdge(D1, {&D2}); + auto NewRCs = DRC.removeInternalRefEdges({{&D1, &D2}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &NewDRC = *NewRCs[0]; EXPECT_EQ(&NewDRC, CG.lookupRefSCC(D1)); @@ -1186,7 +1186,8 @@ TEST(LazyCallGraphTest, InlineAndDeleteFunction) { EXPECT_TRUE(D2RC.isParentOf(NewDRC)); // Now that we've updated the call graph, D2 is dead, so remove it. - CG.removeDeadFunction(D2F); + CG.markDeadFunction(D2F); + CG.removeDeadFunctions({&D2F}); // Check that the graph still looks the same. EXPECT_EQ(&ARC, CG.lookupRefSCC(A1)); @@ -1344,7 +1345,7 @@ TEST(LazyCallGraphTest, InternalEdgeRemoval) { // Remove the edge from b -> a, which should leave the 3 functions still in // a single connected component because of a -> b -> c -> a. SmallVector NewRCs = - RC.removeInternalRefEdge(B, {&A}); + RC.removeInternalRefEdges({{&B, &A}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(A)); EXPECT_EQ(&RC, CG.lookupRefSCC(B)); @@ -1360,7 +1361,7 @@ TEST(LazyCallGraphTest, InternalEdgeRemoval) { // Remove the edge from c -> a, which should leave 'a' in the original RefSCC // and form a new RefSCC for 'b' and 'c'. - NewRCs = RC.removeInternalRefEdge(C, {&A}); + NewRCs = RC.removeInternalRefEdges({{&C, &A}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &BCRC = *NewRCs[0]; LazyCallGraph::RefSCC &ARC = *NewRCs[1]; @@ -1425,7 +1426,7 @@ TEST(LazyCallGraphTest, InternalMultiEdgeRemoval) { // Remove the edges from b -> a and b -> c, leaving b in its own RefSCC. SmallVector NewRCs = - RC.removeInternalRefEdge(B, {&A, &C}); + RC.removeInternalRefEdges({{&B, &A}, {&B, &C}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &BRC = *NewRCs[0]; @@ -1494,7 +1495,7 @@ TEST(LazyCallGraphTest, InternalNoOpEdgeRemoval) { // Remove the edge from a -> c which doesn't change anything. SmallVector NewRCs = - RC.removeInternalRefEdge(AN, {&CN}); + RC.removeInternalRefEdges({{&AN, &CN}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(AN)); EXPECT_EQ(&RC, CG.lookupRefSCC(BN)); @@ -1509,8 +1510,8 @@ TEST(LazyCallGraphTest, InternalNoOpEdgeRemoval) { // Remove the edge from b -> a and c -> b; again this doesn't change // anything. - NewRCs = RC.removeInternalRefEdge(BN, {&AN}); - NewRCs = RC.removeInternalRefEdge(CN, {&BN}); + NewRCs = RC.removeInternalRefEdges({{&BN, &AN}}); + NewRCs = RC.removeInternalRefEdges({{&CN, &BN}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(AN)); EXPECT_EQ(&RC, CG.lookupRefSCC(BN)); @@ -2163,7 +2164,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRef) { // Now delete 'dead'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(DeadN.getFunction()); + CG.markDeadFunction(DeadN.getFunction()); + CG.removeDeadFunctions({&DeadN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2212,7 +2214,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2269,7 +2272,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive2) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2320,7 +2324,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive3) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. From 41f81ad73583bfff7c7dc1caeacbbb783d004e9c Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Tue, 11 Jun 2024 17:51:25 +0100 Subject: [PATCH 13/38] [Tablegen][NFC] Add a check for duplicate features (#94223) We hit this downstream and the only evidence of the mistake was that the results of `Find` on `SubtargetFeatureKV` were corrupted. --- llvm/utils/TableGen/SubtargetEmitter.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 323470940fec5c..60a0402103ce05 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -255,6 +256,9 @@ unsigned SubtargetEmitter::FeatureKeyValues( llvm::sort(FeatureList, LessRecordFieldName()); + // Check that there are no duplicate keys + std::set UniqueKeys; + // Begin feature table OS << "// Sorted (by key) array of values for CPU features.\n" << "extern const llvm::SubtargetFeatureKV " << Target @@ -283,6 +287,10 @@ unsigned SubtargetEmitter::FeatureKeyValues( OS << " },\n"; ++NumFeatures; + + if (!UniqueKeys.insert(CommandLineName).second) + PrintFatalError("Duplicate key in SubtargetFeatureKV: " + + CommandLineName); } // End feature table From 27f3ac5929badc84a339248c9281997b87bb963e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 11 Jun 2024 10:10:42 -0700 Subject: [PATCH 14/38] [flang] Fix character scalar result for REDUCE intrinsic call (#95076) The character reduce runtime functions expect a pointer to a scalar character of the correct length for the result of character reduce. A descriptor was passed so far. Fix the lowering so a proper temporary is created and passed to the runtime. --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 20 ++++++---- flang/test/Lower/Intrinsics/reduce.f90 | 38 +++++++++++++++---- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 4317806561693c..c438ae1250e450 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -5778,15 +5778,19 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, return builder.create(loc, result); } if (fir::isa_char(eleTy)) { - // Create mutable fir.box to be passed to the runtime for the result. - fir::MutableBoxValue resultMutableBox = - fir::factory::createTempMutableBox(builder, loc, eleTy); - mlir::Value resultIrBox = - fir::factory::getMutableIRBox(builder, loc, resultMutableBox); + auto charTy = mlir::dyn_cast_or_null(resultType); + assert(charTy && "expect CharacterType"); + fir::factory::CharacterExprHelper charHelper(builder, loc); + mlir::Value len; + if (charTy.hasDynamicLen()) + len = charHelper.readLengthFromBox(fir::getBase(arrayTmp), charTy); + else + len = builder.createIntegerConstant(loc, builder.getI32Type(), + charTy.getLen()); + fir::CharBoxValue temp = charHelper.createCharacterTemp(eleTy, len); fir::runtime::genReduce(builder, loc, array, operation, mask, identity, - ordered, resultIrBox); - // Handle cleanup of allocatable result descriptor and return - return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE"); + ordered, temp.getBuffer()); + return temp; } return fir::runtime::genReduce(builder, loc, array, operation, mask, identity, ordered); diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 index 842e626d7cc397..8d7b7798a94c56 100644 --- a/flang/test/Lower/Intrinsics/reduce.f90 +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -348,21 +348,25 @@ subroutine char1(a) res = reduce(a, red_char1) end subroutine -! CHECK: fir.call @_FortranAReduceChar1 +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<1> {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar1(%[[RESULT]], {{.*}}) pure function red_char2(a,b) - character(kind=2), intent(in) :: a, b - character(kind=2) :: red_char2 + character(kind=2, len=10), intent(in) :: a, b + character(kind=2, len=10) :: red_char2 red_char2 = a // b end function subroutine char2(a) - character(kind=2), intent(in) :: a(:) - character(kind=2) :: res + character(kind=2, len=10), intent(in) :: a(:) + character(kind=2, len=10) :: res res = reduce(a, red_char2) end subroutine -! CHECK: fir.call @_FortranAReduceChar2 +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<2,10> {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar2(%[[RESULT]], {{.*}}) pure function red_char4(a,b) character(kind=4), intent(in) :: a, b @@ -598,8 +602,8 @@ subroutine char1dim(a) ! CHECK: fir.call @_FortranAReduceCharacter1Dim subroutine char2dim(a) - character(kind=2), intent(in) :: a(:, :) - character(kind=2), allocatable :: res(:) + character(kind=2, len=10), intent(in) :: a(:, :) + character(kind=2, len=10), allocatable :: res(:) res = reduce(a, red_char2, 2) end subroutine @@ -613,4 +617,22 @@ subroutine char4dim(a) ! CHECK: fir.call @_FortranAReduceCharacter4Dim +pure function red_char_dyn(a, b) + character(*), intent(In) :: a, b + character(max(len(a),len(b))) :: red_char_dyn + red_char_dyn = max(a, b) +end function + +subroutine charDyn() + character(5) :: res + character(:), allocatable :: a(:) + allocate(character(10)::a(10)) + res = reduce(a, red_char_dyn) +end subroutine + +! CHECK: %[[BOX_ELESIZE:.*]] = fir.box_elesize %{{.*}} : (!fir.box>>>) -> index +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<1,?>(%[[BOX_ELESIZE]] : index) {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar1(%[[RESULT]], {{.*}}) + end module From 1c59362e4456fb0861fcf4be9831b4db3e921b2b Mon Sep 17 00:00:00 2001 From: geza-herman Date: Tue, 11 Jun 2024 19:32:12 +0200 Subject: [PATCH 15/38] [clangd] Make it possible to enable misc-const-correctness clang-tidy check (#94920) Before this PR, clangd forcefully disabled misc-const-correctness in disableUnusableChecks(). Now we have a FastCheckFilter configuration whose default value (Strict) also disables it. This patch removes misc-const-correctness from disableUnusableChecks() so it's possible to enable by setting FastCheckFilter to None. Fixes https://github.com/llvm/llvm-project/issues/89758 --- clang-tools-extra/clangd/TidyProvider.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index b658a80559937c..a4121df30d3dfa 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -221,13 +221,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { "-hicpp-invalid-access-moved", // Check uses dataflow analysis, which might hang/crash unexpectedly on // incomplete code. - "-bugprone-unchecked-optional-access", - - // ----- Performance problems ----- - - // This check runs expensive analysis for each variable. - // It has been observed to increase reparse time by 10x. - "-misc-const-correctness"); + "-bugprone-unchecked-optional-access"); size_t Size = BadChecks.size(); for (const std::string &Str : ExtraBadChecks) { From 727ecbeee3638d3736cd6345a1f102f8d2c6ce0d Mon Sep 17 00:00:00 2001 From: Sayhaan Siddiqui <49014204+sayhaan@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:36:49 -0700 Subject: [PATCH 16/38] [BOLT][DWARF][NFC] Remove old GDB Index functions (#95019) Remove old usages of GDB Index functions after replacing them with new ones. --- bolt/include/bolt/Rewrite/DWARFRewriter.h | 11 -- bolt/lib/Rewrite/DWARFRewriter.cpp | 171 ---------------------- 2 files changed, 182 deletions(-) diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 3cc9d823c815b2..4559ff5ff51592 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -150,9 +150,6 @@ class DWARFRewriter { /// blocks) to be updated. void updateDebugAddressRanges(); - /// Rewrite .gdb_index section if present. - void updateGdbIndexSection(CUOffsetMap &CUMap, uint32_t NumCUs); - /// DWARFDie contains a pointer to a DIE and hence gets invalidated once the /// embedded DIE is destroyed. This wrapper class stores a DIE internally and /// could be cast to a DWARFDie that is valid even after the initial DIE is @@ -194,14 +191,6 @@ class DWARFRewriter { DwoRangesBase[DWOId] = RangesBase; } - /// Adds an GDBIndexTUEntry if .gdb_index seciton exists. - void addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry); - - /// Returns all entries needed for Types CU list - const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const { - return GDBIndexTUEntryVector; - } - using OverriddenSectionsMap = std::unordered_map; /// Output .dwo files. void writeDWOFiles(DWARFUnit &, const OverriddenSectionsMap &, diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index e1b3762a316606..0e475031eae4f3 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -2060,177 +2060,6 @@ void DWARFRewriter::writeDWOFiles( TempOut->keep(); } -void DWARFRewriter::addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry) { - std::lock_guard Lock(DWARFRewriterMutex); - if (!BC.getGdbIndexSection()) - return; - GDBIndexTUEntryVector.emplace_back(Entry); -} - -void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap, uint32_t NumCUs) { - if (!BC.getGdbIndexSection()) - return; - - // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html - // for .gdb_index section format. - - StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents(); - - const char *Data = GdbIndexContents.data(); - - // Parse the header. - const uint32_t Version = read32le(Data); - if (Version != 7 && Version != 8) { - errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n"; - exit(1); - } - - // Some .gdb_index generators use file offsets while others use section - // offsets. Hence we can only rely on offsets relative to each other, - // and ignore their absolute values. - const uint32_t CUListOffset = read32le(Data + 4); - const uint32_t CUTypesOffset = read32le(Data + 8); - const uint32_t AddressTableOffset = read32le(Data + 12); - const uint32_t SymbolTableOffset = read32le(Data + 16); - const uint32_t ConstantPoolOffset = read32le(Data + 20); - Data += 24; - - // Map CUs offsets to indices and verify existing index table. - std::map OffsetToIndexMap; - const uint32_t CUListSize = CUTypesOffset - CUListOffset; - const uint32_t TUListSize = AddressTableOffset - CUTypesOffset; - const unsigned NUmCUsEncoded = CUListSize / 16; - unsigned MaxDWARFVersion = BC.DwCtx->getMaxVersion(); - unsigned NumDWARF5TUs = - getGDBIndexTUEntryVector().size() - BC.DwCtx->getNumTypeUnits(); - bool SkipTypeUnits = false; - // For DWARF5 Types are in .debug_info. - // LLD doesn't generate Types CU List, and in CU list offset - // only includes CUs. - // GDB 11+ includes only CUs in CU list and generates Types - // list. - // GDB 9 includes CUs and TUs in CU list and generates TYpes - // list. The NumCUs is CUs + TUs, so need to modify the check. - // For split-dwarf - // GDB-11, DWARF5: TU units from dwo are not included. - // GDB-11, DWARF4: TU units from dwo are included. - if (MaxDWARFVersion >= 5) - SkipTypeUnits = !TUListSize ? true - : ((NUmCUsEncoded + NumDWARF5TUs) == - BC.DwCtx->getNumCompileUnits()); - - if (!((CUListSize == NumCUs * 16) || - (CUListSize == (NumCUs + NumDWARF5TUs) * 16))) { - errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n"; - exit(1); - } - DenseSet OriginalOffsets; - for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits(); - Index < Units; ++Index) { - const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index); - if (SkipTypeUnits && CU->isTypeUnit()) - continue; - const uint64_t Offset = read64le(Data); - Data += 16; - if (CU->getOffset() != Offset) { - errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n"; - exit(1); - } - - OriginalOffsets.insert(Offset); - OffsetToIndexMap[Offset] = Index; - } - - // Ignore old address table. - const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset; - // Move Data to the beginning of symbol table. - Data += SymbolTableOffset - CUTypesOffset; - - // Calculate the size of the new address table. - uint32_t NewAddressTableSize = 0; - for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) { - const SmallVector &Ranges = CURangesPair.second; - NewAddressTableSize += Ranges.size() * 20; - } - - // Difference between old and new table (and section) sizes. - // Could be negative. - int32_t Delta = NewAddressTableSize - OldAddressTableSize; - - size_t NewGdbIndexSize = GdbIndexContents.size() + Delta; - - // Free'd by ExecutableFileMemoryManager. - auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize]; - uint8_t *Buffer = NewGdbIndexContents; - - write32le(Buffer, Version); - write32le(Buffer + 4, CUListOffset); - write32le(Buffer + 8, CUTypesOffset); - write32le(Buffer + 12, AddressTableOffset); - write32le(Buffer + 16, SymbolTableOffset + Delta); - write32le(Buffer + 20, ConstantPoolOffset + Delta); - Buffer += 24; - - using MapEntry = std::pair; - std::vector CUVector(CUMap.begin(), CUMap.end()); - // Need to sort since we write out all of TUs in .debug_info before CUs. - std::sort(CUVector.begin(), CUVector.end(), - [](const MapEntry &E1, const MapEntry &E2) -> bool { - return E1.second.Offset < E2.second.Offset; - }); - // Writing out CU List - for (auto &CUInfo : CUVector) { - // Skipping TU for DWARF5 when they are not included in CU list. - if (!OriginalOffsets.count(CUInfo.first)) - continue; - write64le(Buffer, CUInfo.second.Offset); - // Length encoded in CU doesn't contain first 4 bytes that encode length. - write64le(Buffer + 8, CUInfo.second.Length + 4); - Buffer += 16; - } - - // Rewrite TU CU List, since abbrevs can be different. - // Entry example: - // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature = - // 0x418503b8111e9a7b Spec says " triplet, the first value is the CU offset, - // the second value is the type offset in the CU, and the third value is the - // type signature" Looking at what is being generated by gdb-add-index. The - // first entry is TU offset, second entry is offset from it, and third entry - // is the type signature. - if (TUListSize) - for (const GDBIndexTUEntry &Entry : getGDBIndexTUEntryVector()) { - write64le(Buffer, Entry.UnitOffset); - write64le(Buffer + 8, Entry.TypeDIERelativeOffset); - write64le(Buffer + 16, Entry.TypeHash); - Buffer += sizeof(GDBIndexTUEntry); - } - - // Generate new address table. - for (const std::pair &CURangesPair : - ARangesSectionWriter->getCUAddressRanges()) { - const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first]; - const DebugAddressRangesVector &Ranges = CURangesPair.second; - for (const DebugAddressRange &Range : Ranges) { - write64le(Buffer, Range.LowPC); - write64le(Buffer + 8, Range.HighPC); - write32le(Buffer + 16, CUIndex); - Buffer += 20; - } - } - - const size_t TrailingSize = - GdbIndexContents.data() + GdbIndexContents.size() - Data; - assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize && - "size calculation error"); - - // Copy over the rest of the original data. - memcpy(Buffer, Data, TrailingSize); - - // Register the new section. - BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents, - NewGdbIndexSize); -} - std::unique_ptr DWARFRewriter::makeFinalLocListsSection(DWARFVersion Version) { auto LocBuffer = std::make_unique(); From 56f668c12b1a26e103aafe5ac37930b1895c938b Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 11 Jun 2024 19:49:10 +0200 Subject: [PATCH 17/38] [lldb/DWARF] Remove some dead code (#95127) `GetDeclContextDIEs` and `DIEDeclContextsMatch` are unused (possibly since we added support for simplified template names, but I haven't checked). `GetDeclContextDIEs` is also very similar (but subtly different) from `GetDeclContext` and `GetTypeLookupContext`. I am keeping `GetParentDeclContextDIE` as that one still has some callers, but I want to look into the possibility of merging it with at least one of the functions mentioned above. --- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 14 --- .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 3 - .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 89 ------------------- .../SymbolFile/DWARF/SymbolFileDWARF.h | 2 - 4 files changed, 108 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index 7cf92adc6ef578..0ef94ed9f17c34 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -367,20 +367,6 @@ lldb_private::Type *DWARFDIE::ResolveTypeUID(const DWARFDIE &die) const { return nullptr; } -std::vector DWARFDIE::GetDeclContextDIEs() const { - if (!IsValid()) - return {}; - - std::vector result; - DWARFDIE parent = GetParentDeclContextDIE(); - while (parent.IsValid() && parent.GetDIE() != GetDIE()) { - result.push_back(std::move(parent)); - parent = parent.GetParentDeclContextDIE(); - } - - return result; -} - static void GetDeclContextImpl(DWARFDIE die, llvm::SmallSet &seen, std::vector &context) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 511ca62d0197a8..c74a82061fccf2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -69,9 +69,6 @@ class DWARFDIE : public DWARFBaseDIE { DWARFDIE GetParentDeclContextDIE() const; - // DeclContext related functions - std::vector GetDeclContextDIEs() const; - /// Return this DIE's decl context as it is needed to look up types /// in Clang modules. This context will include any modules or functions that /// the type is declared in so an exact module match can be efficiently made. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index a52a7d67673742..d9e81f9c105b2d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3039,95 +3039,6 @@ TypeSP SymbolFileDWARF::FindCompleteObjCDefinitionTypeForDIE( return type_sp; } -// This function helps to ensure that the declaration contexts match for two -// different DIEs. Often times debug information will refer to a forward -// declaration of a type (the equivalent of "struct my_struct;". There will -// often be a declaration of that type elsewhere that has the full definition. -// When we go looking for the full type "my_struct", we will find one or more -// matches in the accelerator tables and we will then need to make sure the -// type was in the same declaration context as the original DIE. This function -// can efficiently compare two DIEs and will return true when the declaration -// context matches, and false when they don't. -bool SymbolFileDWARF::DIEDeclContextsMatch(const DWARFDIE &die1, - const DWARFDIE &die2) { - if (die1 == die2) - return true; - - std::vector decl_ctx_1; - std::vector decl_ctx_2; - // The declaration DIE stack is a stack of the declaration context DIEs all - // the way back to the compile unit. If a type "T" is declared inside a class - // "B", and class "B" is declared inside a class "A" and class "A" is in a - // namespace "lldb", and the namespace is in a compile unit, there will be a - // stack of DIEs: - // - // [0] DW_TAG_class_type for "B" - // [1] DW_TAG_class_type for "A" - // [2] DW_TAG_namespace for "lldb" - // [3] DW_TAG_compile_unit or DW_TAG_partial_unit for the source file. - // - // We grab both contexts and make sure that everything matches all the way - // back to the compiler unit. - - // First lets grab the decl contexts for both DIEs - decl_ctx_1 = die1.GetDeclContextDIEs(); - decl_ctx_2 = die2.GetDeclContextDIEs(); - // Make sure the context arrays have the same size, otherwise we are done - const size_t count1 = decl_ctx_1.size(); - const size_t count2 = decl_ctx_2.size(); - if (count1 != count2) - return false; - - // Make sure the DW_TAG values match all the way back up the compile unit. If - // they don't, then we are done. - DWARFDIE decl_ctx_die1; - DWARFDIE decl_ctx_die2; - size_t i; - for (i = 0; i < count1; i++) { - decl_ctx_die1 = decl_ctx_1[i]; - decl_ctx_die2 = decl_ctx_2[i]; - if (decl_ctx_die1.Tag() != decl_ctx_die2.Tag()) - return false; - } -#ifndef NDEBUG - - // Make sure the top item in the decl context die array is always - // DW_TAG_compile_unit or DW_TAG_partial_unit. If it isn't then - // something went wrong in the DWARFDIE::GetDeclContextDIEs() - // function. - dw_tag_t cu_tag = decl_ctx_1[count1 - 1].Tag(); - UNUSED_IF_ASSERT_DISABLED(cu_tag); - assert(cu_tag == DW_TAG_compile_unit || cu_tag == DW_TAG_partial_unit); - -#endif - // Always skip the compile unit when comparing by only iterating up to "count - // - 1". Here we compare the names as we go. - for (i = 0; i < count1 - 1; i++) { - decl_ctx_die1 = decl_ctx_1[i]; - decl_ctx_die2 = decl_ctx_2[i]; - const char *name1 = decl_ctx_die1.GetName(); - const char *name2 = decl_ctx_die2.GetName(); - // If the string was from a DW_FORM_strp, then the pointer will often be - // the same! - if (name1 == name2) - continue; - - // Name pointers are not equal, so only compare the strings if both are not - // NULL. - if (name1 && name2) { - // If the strings don't compare, we are done... - if (strcmp(name1, name2) != 0) - return false; - } else { - // One name was NULL while the other wasn't - return false; - } - } - // We made it through all of the checks and the declaration contexts are - // equal. - return true; -} - TypeSP SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) { TypeSP type_sp; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 7282c08c6857c9..5d3654efcce544 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -461,8 +461,6 @@ class SymbolFileDWARF : public SymbolFileCommon { FindBlockContainingSpecification(const DWARFDIE &die, dw_offset_t spec_block_die_offset); - bool DIEDeclContextsMatch(const DWARFDIE &die1, const DWARFDIE &die2); - bool ClassContainsSelector(const DWARFDIE &class_die, ConstString selector); /// Parse call site entries (DW_TAG_call_site), including any nested call site From 540893e43fb7a5e08deec0d951fd3c1d957f8f89 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Tue, 11 Jun 2024 10:52:51 -0700 Subject: [PATCH 18/38] [BOLT] Add auto parsing for Linux kernel .altinstructions (#95068) .altinstructions section contains a list of structures where fields can have different sizes while other fields could be present or not depending on the kernel version. Add automatic detection of such variations and use it by default. The user can still overwrite the automatic detection with `--alt-inst-has-padlen` and `--alt-inst-feature-size` options. --- bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 72 ++++++++++++++++++++++-- bolt/test/X86/linux-alt-instruction.s | 20 ++++--- 2 files changed, 80 insertions(+), 12 deletions(-) diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index b2c8b2446f7e1e..6b3f5bce9f0f58 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -273,6 +273,8 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// Handle alternative instruction info from .altinstructions. Error readAltInstructions(); + Error tryReadAltInstructions(uint32_t AltInstFeatureSize, + bool AltInstHasPadLen, bool ParseOnly); Error rewriteAltInstructions(); /// Read .pci_fixup @@ -1319,12 +1321,69 @@ Error LinuxKernelRewriter::rewriteBugTable() { /// u8 padlen; // present in older kernels /// } __packed; /// -/// Note the structures is packed. +/// Note that the structure is packed. +/// +/// Since the size of the "feature" field could be either u16 or u32, and +/// "padlen" presence is unknown, we attempt to parse .altinstructions section +/// using all possible combinations (four at this time). Since we validate the +/// contents of the section and its size, the detection works quite well. +/// Still, we leave the user the opportunity to specify these features on the +/// command line and skip the guesswork. Error LinuxKernelRewriter::readAltInstructions() { AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); if (!AltInstrSection) return Error::success(); + // Presence of "padlen" field. + std::vector PadLenVariants; + if (opts::AltInstHasPadLen.getNumOccurrences()) + PadLenVariants.push_back(opts::AltInstHasPadLen); + else + PadLenVariants = {false, true}; + + // Size (in bytes) variants of "feature" field. + std::vector FeatureSizeVariants; + if (opts::AltInstFeatureSize.getNumOccurrences()) + FeatureSizeVariants.push_back(opts::AltInstFeatureSize); + else + FeatureSizeVariants = {2, 4}; + + for (bool AltInstHasPadLen : PadLenVariants) { + for (uint32_t AltInstFeatureSize : FeatureSizeVariants) { + LLVM_DEBUG({ + dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen + << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n"; + }); + if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, + /*ParseOnly*/ true)) { + consumeError(std::move(E)); + continue; + } + + LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n"); + + if (!opts::AltInstHasPadLen.getNumOccurrences()) + BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr + << '=' << AltInstHasPadLen << '\n'; + + if (!opts::AltInstFeatureSize.getNumOccurrences()) + BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr + << '=' << AltInstFeatureSize << '\n'; + + return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, + /*ParseOnly*/ false); + } + } + + // We couldn't match the format. Read again to properly propagate the error + // to the user. + return tryReadAltInstructions(opts::AltInstFeatureSize, + opts::AltInstHasPadLen, /*ParseOnly*/ false); +} + +Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, + bool AltInstHasPadLen, + bool ParseOnly) { const uint64_t Address = AltInstrSection->getAddress(); DataExtractor DE = DataExtractor(AltInstrSection->getContents(), BC.AsmInfo->isLittleEndian(), @@ -1336,12 +1395,12 @@ Error LinuxKernelRewriter::readAltInstructions() { Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); const uint64_t AltInstAddress = Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); + const uint64_t Feature = DE.getUnsigned(Cursor, AltInstFeatureSize); const uint8_t OrgSize = DE.getU8(Cursor); const uint8_t AltSize = DE.getU8(Cursor); // Older kernels may have the padlen field. - const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; + const uint8_t PadLen = AltInstHasPadLen ? DE.getU8(Cursor) : 0; if (!Cursor) return createStringError( @@ -1358,7 +1417,7 @@ Error LinuxKernelRewriter::readAltInstructions() { << "\n\tFeature: 0x" << Twine::utohexstr(Feature) << "\n\tOrgSize: " << (int)OrgSize << "\n\tAltSize: " << (int)AltSize << '\n'; - if (opts::AltInstHasPadLen) + if (AltInstHasPadLen) BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; } @@ -1375,7 +1434,7 @@ Error LinuxKernelRewriter::readAltInstructions() { BinaryFunction *AltBF = BC.getBinaryFunctionContainingAddress(AltInstAddress); - if (AltBF && BC.shouldEmit(*AltBF)) { + if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) { BC.errs() << "BOLT-WARNING: alternative instruction sequence found in function " << *AltBF << '\n'; @@ -1397,6 +1456,9 @@ Error LinuxKernelRewriter::readAltInstructions() { " referenced by .altinstructions entry %d", OrgInstAddress, EntryID); + if (ParseOnly) + continue; + // There could be more than one alternative instruction sequences for the // same original instruction. Annotate each alternative separately. std::string AnnotationName = "AltInst"; diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s index 2cdf31519682a8..66cd33a711b89b 100644 --- a/bolt/test/X86/linux-alt-instruction.s +++ b/bolt/test/X86/linux-alt-instruction.s @@ -12,24 +12,30 @@ ## Older kernels used to have padlen field in alt_instr. Check compatibility. # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ -# RUN: %s -o %t.o -# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: %s -o %t.padlen.o +# RUN: %clang %cflags -nostdlib %t.padlen.o -o %t.padlen.exe \ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-has-padlen -o %t.out \ +# RUN: llvm-bolt %t.padlen.exe --print-normalized --alt-inst-has-padlen -o %t.padlen.out \ # RUN: | FileCheck %s ## Check with a larger size of "feature" field in alt_instr. # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ -# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o -# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.fs4.o +# RUN: %clang %cflags -nostdlib %t.fs4.o -o %t.fs4.exe \ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=4 -o %t.out \ +# RUN: llvm-bolt %t.fs4.exe --print-normalized --alt-inst-feature-size=4 -o %t.fs4.out \ # RUN: | FileCheck %s ## Check that out-of-bounds read is handled properly. -# RUN: not llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=2 -o %t.out +# RUN: not llvm-bolt %t.fs4.exe --alt-inst-feature-size=2 -o %t.fs4.out + +## Check that BOLT automatically detects structure fields in .altinstructions. + +# RUN: llvm-bolt %t.exe --print-normalized -o %t.out | FileCheck %s +# RUN: llvm-bolt %t.exe --print-normalized -o %t.padlen.out | FileCheck %s +# RUN: llvm-bolt %t.exe --print-normalized -o %t.fs4.out | FileCheck %s # CHECK: BOLT-INFO: Linux kernel binary detected # CHECK: BOLT-INFO: parsed 2 alternative instruction entries From 9b4f8acf9dd1cd517f923c6de8274eed80879f6c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 11 Jun 2024 14:00:24 -0400 Subject: [PATCH 19/38] [clang] Fix a few comment typos to cycle bots --- clang/lib/Sema/SemaDecl.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4b9b735f1cfb43..95a6fe66babae9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1493,7 +1493,7 @@ void Sema::ActOnExitFunctionContext() { /// /// This routine determines whether overloading is possible, not /// whether a new declaration actually overloads a previous one. -/// It will return true in C++ (where overloads are alway permitted) +/// It will return true in C++ (where overloads are always permitted) /// or, as a C extension, when either the new declaration or a /// previous one is declared with the 'overloadable' attribute. static bool AllowOverloadingOfFunction(const LookupResult &Previous, @@ -4147,7 +4147,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, // If we are merging two functions where only one of them has a prototype, // we may have enough information to decide to issue a diagnostic that the - // function without a protoype will change behavior in C23. This handles + // function without a prototype will change behavior in C23. This handles // cases like: // void i(); void i(int j); // void i(int j); void i(); @@ -10553,7 +10553,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (getLangOpts().CUDA && !isFunctionTemplateSpecialization) CUDA().maybeAddHostDeviceAttrs(NewFD, Previous); - // Handle explict specializations of function templates + // Handle explicit specializations of function templates // and friend function declarations with an explicit // template argument list. if (isFunctionTemplateSpecialization) { @@ -12601,7 +12601,7 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) { if (FD->getName() != "DllMain") FD->setHasImplicitReturnZero(true); - // Explicity specified calling conventions are applied to MSVC entry points + // Explicitly specified calling conventions are applied to MSVC entry points if (!hasExplicitCallingConv(T)) { if (isDefaultStdCall(FD, *this)) { if (FT->getCallConv() != CC_X86StdCall) { @@ -13674,12 +13674,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { CreateRecoveryExpr(Init->getBeginLoc(), Init->getEndLoc(), Args); if (RecoveryExpr.get()) VDecl->setInit(RecoveryExpr.get()); - // In general, for error recovery purposes, the initalizer doesn't play + // In general, for error recovery purposes, the initializer doesn't play // part in the valid bit of the declaration. There are a few exceptions: // 1) if the var decl has a deduced auto type, and the type cannot be // deduced by an invalid initializer; - // 2) if the var decl is decompsition decl with a non-deduced type, and - // the initialization fails (e.g. `int [a] = {1, 2};`); + // 2) if the var decl is a decomposition decl with a non-deduced type, + // and the initialization fails (e.g. `int [a] = {1, 2};`); // Case 1) was already handled elsewhere. if (isa(VDecl)) // Case 2) VDecl->setInvalidDecl(); @@ -13897,9 +13897,9 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { } } else if (VDecl->isFileVarDecl()) { // In C, extern is typically used to avoid tentative definitions when - // declaring variables in headers, but adding an intializer makes it a + // declaring variables in headers, but adding an initializer makes it a // definition. This is somewhat confusing, so GCC and Clang both warn on it. - // In C++, extern is often used to give implictly static const variables + // In C++, extern is often used to give implicitly static const variables // external linkage, so don't warn in that case. If selectany is present, // this might be header code intended for C and C++ inclusion, so apply the // C++ rules. @@ -14093,7 +14093,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { return; } } - // The declaration is unitialized, no need for further checks. + // The declaration is uninitialized, no need for further checks. return; } @@ -16324,7 +16324,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, FSI->ObjCWarnForNoDesignatedInitChain = false; } if (FSI->ObjCWarnForNoInitDelegation) { - // Don't issue this warning for unavaialable inits. + // Don't issue this warning for unavailable inits. if (!MD->isUnavailable()) Diag(MD->getLocation(), diag::warn_objc_secondary_init_missing_init_call); @@ -17876,7 +17876,7 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, SkipBody->Previous = Def; makeMergedDefinitionVisible(Hidden); // Carry on and handle it like a normal definition. We'll - // skip starting the definitiion later. + // skip starting the definition later. } } else if (!IsExplicitSpecializationAfterInstantiation) { // A redeclaration in function prototype scope in C isn't @@ -20475,7 +20475,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, } else if (LangOpts.OpenMP > 45) { // In OpenMP host compilation prior to 5.0 everything was an emitted host // function. In 5.0, no_host was introduced which might cause a function to - // be ommitted. + // be omitted. std::optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); if (DevTy) From ca6386073308d3c41647d8fc3e2cf72a77d46c76 Mon Sep 17 00:00:00 2001 From: Michal Paszkowski Date: Tue, 11 Jun 2024 11:09:21 -0700 Subject: [PATCH 20/38] [SPIR-V] Don't change switch condition type in CodeGen opts (#94959) This change makes sure the preferred switch condition int type size remains the same throughout CodeGen optimizations. The change fixes running several OpenCL applications with -O2 or higher opt levels, and fixes Basic/stream/stream_max_stmt_exceed.cpp DPC++ E2E test with -O2. --- llvm/lib/Target/SPIRV/SPIRVISelLowering.h | 5 +++++ .../optimizations/switch-condition-type.ll | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h index 6fc200abf46279..77356b7512a739 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -68,6 +68,11 @@ class SPIRVTargetLowering : public TargetLowering { // extra instructions required to preserve validity of SPIR-V code imposed by // the standard. void finalizeLowering(MachineFunction &MF) const override; + + MVT getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const override { + return ConditionVT.getSimpleVT(); + } }; } // namespace llvm diff --git a/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll new file mode 100644 index 00000000000000..054520d2021b99 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll @@ -0,0 +1,18 @@ +; RUN: llc -O2 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O2 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#INT16:]] = OpTypeInt 16 0 +; CHECK: %[[#PARAM:]] = OpFunctionParameter %[[#INT16]] +; CHECK: OpSwitch %[[#PARAM]] %[[#]] 1 %[[#]] 2 %[[#]] + +define i32 @test_switch(i16 %cond) { +entry: + switch i16 %cond, label %default [ i16 1, label %case_one + i16 2, label %case_two ] +case_one: + ret i32 1 +case_two: + ret i32 2 +default: + ret i32 3 +} From a13bc9714a6bfb766693aa7900217f6f9be6f25d Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 11 Jun 2024 19:21:11 +0100 Subject: [PATCH 21/38] [BOLT][AArch64] Implement PLTCall optimization (#93584) `convertCallToIndirectCall` applies the PLTCall optimization and returns an (updated if needed) iterator to the converted call instruction. Since AArch64 requires to inject additional instructions to implement this pass, the relevant BasicBlock and an iterator was passed to the `convertCallToIndirectCall`. `NumCallsOptimized` is updated only on successful application of the pass. Tests: - Inputs/plt-tailcall.c: an example of a tail call optimized PLT call. - AArch64/plt-call.test: it is the actual A64 test, that runs the PLTCall optimization on the above input file and verifies the application of the pass to the calls: 'printf' and 'puts'. --- bolt/include/bolt/Core/MCPlusBuilder.h | 13 +++--- bolt/lib/Passes/PLTCall.cpp | 19 +++++---- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 41 +++++++++++++++++++ bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 16 +++++--- bolt/test/AArch64/plt-call.test | 15 +++++++ bolt/test/Inputs/plt-tailcall.c | 8 ++++ bolt/test/X86/plt-call.test | 11 +++++ 7 files changed, 104 insertions(+), 19 deletions(-) create mode 100644 bolt/test/AArch64/plt-call.test create mode 100644 bolt/test/Inputs/plt-tailcall.c create mode 100644 bolt/test/X86/plt-call.test diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index a5fb3901428d9d..765372aa9e402f 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1412,13 +1412,14 @@ class MCPlusBuilder { return false; } - /// Modify a direct call instruction \p Inst with an indirect call taking - /// a destination from a memory location pointed by \p TargetLocation symbol. - virtual bool convertCallToIndirectCall(MCInst &Inst, - const MCSymbol *TargetLocation, - MCContext *Ctx) { + /// Creates an indirect call to the function within the \p DirectCall PLT + /// stub. The function's memory location is pointed by the \p TargetLocation + /// symbol. + virtual InstructionListType + createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, MCContext *Ctx) { llvm_unreachable("not implemented"); - return false; + return {}; } /// Morph an indirect call into a load where \p Reg holds the call target. diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp index d0276f22e14ef8..2ed996fadbb99e 100644 --- a/bolt/lib/Passes/PLTCall.cpp +++ b/bolt/lib/Passes/PLTCall.cpp @@ -48,8 +48,8 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { return Error::success(); uint64_t NumCallsOptimized = 0; - for (auto &It : BC.getBinaryFunctions()) { - BinaryFunction &Function = It.second; + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; if (!shouldOptimize(Function)) continue; @@ -61,18 +61,21 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount()) continue; - for (MCInst &Instr : BB) { - if (!BC.MIB->isCall(Instr)) + for (auto II = BB.begin(); II != BB.end(); II++) { + if (!BC.MIB->isCall(*II)) continue; - const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); + const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*II); if (!CallSymbol) continue; const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol); if (!CalleeBF || !CalleeBF->isPLTFunction()) continue; - BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(), - BC.Ctx.get()); - BC.MIB->addAnnotation(Instr, "PLTCall", true); + const InstructionListType NewCode = BC.MIB->createIndirectPltCall( + *II, CalleeBF->getPLTSymbol(), BC.Ctx.get()); + II = BB.replaceInstruction(II, NewCode); + assert(!NewCode.empty() && "PLT Call replacement must be non-empty"); + std::advance(II, NewCode.size() - 1); + BC.MIB->addAnnotation(*II, "PLTCall", true); ++NumCallsOptimized; } } diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index a74eda8e4a566e..5220d305b838d5 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1054,6 +1054,47 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { return true; } + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + const bool IsTailCall = isTailCall(DirectCall); + assert((DirectCall.getOpcode() == AArch64::BL || + (DirectCall.getOpcode() == AArch64::B && IsTailCall)) && + "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Code sequence for indirect plt call: + // adrp x16 + // ldr x17, [x16, #] + // blr x17 ; or 'br' for tail calls + + MCInst InstAdrp; + InstAdrp.setOpcode(AArch64::ADRP); + InstAdrp.addOperand(MCOperand::createReg(AArch64::X16)); + InstAdrp.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE); + Code.emplace_back(InstAdrp); + + MCInst InstLoad; + InstLoad.setOpcode(AArch64::LDRXui); + InstLoad.addOperand(MCOperand::createReg(AArch64::X17)); + InstLoad.addOperand(MCOperand::createReg(AArch64::X16)); + InstLoad.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC); + Code.emplace_back(InstLoad); + + MCInst InstCall; + InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR); + InstCall.addOperand(MCOperand::createReg(AArch64::X17)); + if (IsTailCall) + setTailCall(InstCall); + Code.emplace_back(InstCall); + + return Code; + } + bool lowerTailCall(MCInst &Inst) override { removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); if (getConditionalTailCall(Inst)) diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index e350e701c7b7ba..515c9a94c58cd4 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1639,11 +1639,16 @@ class X86MCPlusBuilder : public MCPlusBuilder { return true; } - bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation, - MCContext *Ctx) override { - assert((Inst.getOpcode() == X86::CALL64pcrel32 || - (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) && + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + assert((DirectCall.getOpcode() == X86::CALL64pcrel32 || + (DirectCall.getOpcode() == X86::JMP_4 && isTailCall(DirectCall))) && "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Create a new indirect call by converting the previous direct call. + MCInst Inst = DirectCall; const auto NewOpcode = (Inst.getOpcode() == X86::CALL64pcrel32) ? X86::CALL64m : X86::JMP32m; Inst.setOpcode(NewOpcode); @@ -1664,7 +1669,8 @@ class X86MCPlusBuilder : public MCPlusBuilder { Inst.insert(Inst.begin(), MCOperand::createReg(X86::RIP)); // BaseReg - return true; + Code.emplace_back(Inst); + return Code; } void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override { diff --git a/bolt/test/AArch64/plt-call.test b/bolt/test/AArch64/plt-call.test new file mode 100644 index 00000000000000..da307d4a6c01e6 --- /dev/null +++ b/bolt/test/AArch64/plt-call.test @@ -0,0 +1,15 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: adrp x16, printf@GOT +CHECK: ldr x17, [x16, :lo12:printf@GOT] +CHECK: blr x17 # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: adrp x16, puts@GOT +CHECK: ldr x17, [x16, :lo12:puts@GOT] +CHECK: br x17 # TAILCALL # PLTCall: 1 diff --git a/bolt/test/Inputs/plt-tailcall.c b/bolt/test/Inputs/plt-tailcall.c new file mode 100644 index 00000000000000..13f6e29c607747 --- /dev/null +++ b/bolt/test/Inputs/plt-tailcall.c @@ -0,0 +1,8 @@ +#include "stub.h" + +int foo(char *c) { + printf(""); + __attribute__((musttail)) return puts(c); +} + +int main() { return foo("a"); } diff --git a/bolt/test/X86/plt-call.test b/bolt/test/X86/plt-call.test new file mode 100644 index 00000000000000..e6ae86c179d279 --- /dev/null +++ b/bolt/test/X86/plt-call.test @@ -0,0 +1,11 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: callq *printf@GOT(%rip) # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: jmpl *puts@GOT(%rip) # TAILCALL # PLTCall: 1 From 18a8983c36171659cc84f109c2f3c38e6d10d1a3 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 11 Jun 2024 19:39:02 +0100 Subject: [PATCH 22/38] LAA: refactor analyzeLoop to return bool (NFC) (#93824) Avoid wastefully setting CanVecMem in several places in analyzeLoop, complicating the logic, to get the function to return a bool, and set CanVecMem in the caller. --- .../llvm/Analysis/LoopAccessAnalysis.h | 7 ++- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 58 ++++++++----------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index b9f385f4c4b8fa..7a54fe55014be1 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -704,9 +704,10 @@ class LoopAccessInfo { const PredicatedScalarEvolution &getPSE() const { return *PSE; } private: - /// Analyze the loop. - void analyzeLoop(AAResults *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + /// Analyze the loop. Returns true if all memory access in the loop can be + /// vectorized. + bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, + DominatorTree *DT); /// Check if the structure of the loop allows it to be analyzed by this /// pass. diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 93b8d28ef749f3..fd8919fff6ff96 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2389,7 +2389,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { return true; } -void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, +bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { // Holds the Load and Store instructions. @@ -2430,10 +2430,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // With both a non-vectorizable memory instruction and a convergent // operation, found in this loop, no reason to continue the search. - if (HasComplexMemInst && HasConvergentOp) { - CanVecMem = false; - return; - } + if (HasComplexMemInst && HasConvergentOp) + return false; // Avoid hitting recordAnalysis multiple times. if (HasComplexMemInst) @@ -2508,10 +2506,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } // Next instr. } // Next block. - if (HasComplexMemInst) { - CanVecMem = false; - return; - } + if (HasComplexMemInst) + return false; // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -2520,8 +2516,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // care if the pointers are *restrict*. if (!Stores.size()) { LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); - CanVecMem = true; - return; + return true; } MemoryDepChecker::DepCandidates DependentAccesses; @@ -2574,8 +2569,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, LLVM_DEBUG( dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"); - CanVecMem = true; - return; + return true; } for (LoadInst *LD : Loads) { @@ -2622,8 +2616,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // other reads in this loop then is it safe to vectorize. if (NumReadWrites == 1 && NumReads == 0) { LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); - CanVecMem = true; - return; + return true; } // Build dependence sets and check whether we need a runtime pointer bounds @@ -2642,21 +2635,20 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, << "cannot identify array bounds"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); - CanVecMem = false; - return; + return false; } LLVM_DEBUG( dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); - CanVecMem = true; + bool DepsAreSafe = true; if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); - CanVecMem = DepChecker->areDepsSafe(DependentAccesses, - Accesses.getDependenciesToCheck(), - Accesses.getUnderlyingObjects()); + DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, + Accesses.getDependenciesToCheck(), + Accesses.getUnderlyingObjects()); - if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { + if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. @@ -2676,30 +2668,30 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, recordAnalysis("CantCheckMemDepsAtRunTime", I) << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); - CanVecMem = false; - return; + return false; } - - CanVecMem = true; + DepsAreSafe = true; } } if (HasConvergentOp) { recordAnalysis("CantInsertRuntimeCheckWithConvergent") - << "cannot add control dependency to convergent operation"; + << "cannot add control dependency to convergent operation"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"); - CanVecMem = false; - return; + return false; } - if (CanVecMem) + if (DepsAreSafe) { LLVM_DEBUG( dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); - else - emitUnsafeDependenceRemark(); + return true; + } + + emitUnsafeDependenceRemark(); + return false; } void LoopAccessInfo::emitUnsafeDependenceRemark() { @@ -3048,7 +3040,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits); PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) - analyzeLoop(AA, LI, TLI, DT); + CanVecMem = analyzeLoop(AA, LI, TLI, DT); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { From 3af35251c8cd0729674076ab99c64cff6126c270 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Jun 2024 11:54:48 -0700 Subject: [PATCH 23/38] [ProfileData] Simplify InstrProfValueSiteRecord (NFC) (#95143) std::list default-constructs itself as an empty list, so we don't need to call ValueData.clear() in the constructor. --- llvm/include/llvm/ProfileData/InstrProf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index d6831eeaa794b7..dae2caf0181e46 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -797,7 +797,7 @@ struct InstrProfValueSiteRecord { /// Value profiling data pairs at a given value site. std::list ValueData; - InstrProfValueSiteRecord() { ValueData.clear(); } + InstrProfValueSiteRecord() = default; template InstrProfValueSiteRecord(InputIterator F, InputIterator L) : ValueData(F, L) {} From c6d85baf9f12f69915559aff5ed6c48b63daafdd Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Tue, 11 Jun 2024 12:10:54 -0700 Subject: [PATCH 24/38] [mlir][sparse] implement sparse space collapse pass. (#89003) --- .../Dialect/SparseTensor/Transforms/Passes.h | 6 + .../Dialect/SparseTensor/Transforms/Passes.td | 16 ++ .../SparseTensor/Transforms/CMakeLists.txt | 1 + .../Transforms/SparseSpaceCollapse.cpp | 199 ++++++++++++++++++ .../SparseTensor/sparse_space_collapse.mlir | 37 ++++ 5 files changed, 259 insertions(+) create mode 100644 mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp create mode 100644 mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index d6d038ef65bdf4..3043a0c4dc4109 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -248,6 +248,12 @@ std::unique_ptr createSparsificationAndBufferizationPass( bool enableBufferInitialization, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen); +//===----------------------------------------------------------------------===// +// Sparse Iteration Transform Passes +//===----------------------------------------------------------------------===// + +std::unique_ptr createSparseSpaceCollapsePass(); + //===----------------------------------------------------------------------===// // Registration. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 2f844cee5ff528..c6554e1c94a4a4 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -464,4 +464,20 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M ]; } +//===----------------------------------------------------------------------===// +// Sparse Iteration Transform Passes +//===----------------------------------------------------------------------===// + +def SparseSpaceCollapse : Pass<"sparse-space-collapse", "func::FuncOp"> { + let summary = "sparse space collapsing pass"; + let description = [{ + This pass collapses consecutive sparse spaces (extracted from the same tensor) + into one multi-dimensional space. The pass is not yet stablized. + }]; + let constructor = "mlir::createSparseSpaceCollapsePass()"; + let dependentDialects = [ + "sparse_tensor::SparseTensorDialect", + ]; +} + #endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_PASSES diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt index af3a1b48f45af9..2a29ee8a7a87cb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms SparseGPUCodegen.cpp SparseReinterpretMap.cpp SparseStorageSpecifierToLLVM.cpp + SparseSpaceCollapse.cpp SparseTensorCodegen.cpp SparseTensorConversion.cpp SparseTensorPasses.cpp diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp new file mode 100644 index 00000000000000..924046fcd9961f --- /dev/null +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp @@ -0,0 +1,199 @@ +//===--------- SparseSpaceCollapse.cpp - Collapse Sparse Space Pass -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/Transforms/Passes.h" + +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/Transforms/Passes.h" + +namespace mlir { +#define GEN_PASS_DEF_SPARSESPACECOLLAPSE +#include "mlir/Dialect/SparseTensor/Transforms/Passes.h.inc" +} // namespace mlir + +#define DEBUG_TYPE "sparse-space-collapse" + +using namespace mlir; +using namespace sparse_tensor; + +namespace { + +struct CollapseSpaceInfo { + ExtractIterSpaceOp space; + IterateOp loop; +}; + +bool isCollapsableLoops(LoopLikeOpInterface parent, LoopLikeOpInterface node) { + auto pIterArgs = parent.getRegionIterArgs(); + auto nInitArgs = node.getInits(); + if (pIterArgs.size() != nInitArgs.size()) + return false; + + // Two loops are collapsable if they are perfectly nested. + auto pYields = parent.getYieldedValues(); + auto nResult = node.getLoopResults().value(); + + bool yieldEq = + llvm::all_of(llvm::zip_equal(pYields, nResult), [](auto zipped) { + return std::get<0>(zipped) == std::get<1>(zipped); + }); + + // Parent iter_args should be passed directly to the node's init_args. + bool iterArgEq = + llvm::all_of(llvm::zip_equal(pIterArgs, nInitArgs), [](auto zipped) { + return std::get<0>(zipped) == std::get<1>(zipped); + }); + + return yieldEq && iterArgEq; +} + +bool legalToCollapse(SmallVectorImpl &toCollapse, + ExtractIterSpaceOp curSpace) { + + auto getIterateOpOverSpace = [](ExtractIterSpaceOp space) -> IterateOp { + Value spaceVal = space.getExtractedSpace(); + if (spaceVal.hasOneUse()) + return llvm::dyn_cast(*spaceVal.getUsers().begin()); + return nullptr; + }; + + if (toCollapse.empty()) { + // Collapse root. + if (auto itOp = getIterateOpOverSpace(curSpace)) { + CollapseSpaceInfo &info = toCollapse.emplace_back(); + info.space = curSpace; + info.loop = itOp; + return true; + } + return false; + } + + auto parent = toCollapse.back().space; + auto pItOp = toCollapse.back().loop; + auto nItOp = getIterateOpOverSpace(curSpace); + + // Can only collapse spaces extracted from the same tensor. + if (parent.getTensor() != curSpace.getTensor()) { + LLVM_DEBUG({ + llvm::dbgs() + << "failed to collpase spaces extracted from different tensors."; + }); + return false; + } + + // Can only collapse consecutive simple iteration on one tensor (i.e., no + // coiteration). + if (!nItOp || nItOp->getBlock() != curSpace->getBlock() || + pItOp.getIterator() != curSpace.getParentIter() || + curSpace->getParentOp() != pItOp.getOperation()) { + LLVM_DEBUG( + { llvm::dbgs() << "failed to collapse non-consecutive IterateOps."; }); + return false; + } + + if (pItOp && !isCollapsableLoops(pItOp, nItOp)) { + LLVM_DEBUG({ + llvm::dbgs() + << "failed to collapse IterateOps that are not perfectly nested."; + }); + return false; + } + + CollapseSpaceInfo &info = toCollapse.emplace_back(); + info.space = curSpace; + info.loop = nItOp; + return true; +} + +void collapseSparseSpace(MutableArrayRef toCollapse) { + if (toCollapse.size() < 2) + return; + + ExtractIterSpaceOp root = toCollapse.front().space; + ExtractIterSpaceOp leaf = toCollapse.back().space; + Location loc = root.getLoc(); + + assert(root->hasOneUse() && leaf->hasOneUse()); + + // Insert collapsed operation at the same scope as root operation. + OpBuilder builder(root); + + // Construct the collapsed iteration space. + auto collapsedSpace = builder.create( + loc, root.getTensor(), root.getParentIter(), root.getLoLvl(), + leaf.getHiLvl()); + + auto rItOp = llvm::cast(*root->getUsers().begin()); + auto innermost = toCollapse.back().loop; + + IRMapping mapper; + mapper.map(leaf, collapsedSpace.getExtractedSpace()); + for (auto z : llvm::zip_equal(innermost.getInitArgs(), rItOp.getInitArgs())) + mapper.map(std::get<0>(z), std::get<1>(z)); + + auto cloned = llvm::cast(builder.clone(*innermost, mapper)); + builder.setInsertionPointToStart(cloned.getBody()); + + LevelSet crdUsedLvls; + unsigned shift = 0, argIdx = 1; + for (auto info : toCollapse.drop_back()) { + LevelSet set = info.loop.getCrdUsedLvls(); + crdUsedLvls |= set.lshift(shift); + shift += info.loop.getSpaceDim(); + for (BlockArgument crd : info.loop.getCrds()) { + BlockArgument collapsedCrd = cloned.getBody()->insertArgument( + argIdx++, builder.getIndexType(), crd.getLoc()); + crd.replaceAllUsesWith(collapsedCrd); + } + } + crdUsedLvls |= innermost.getCrdUsedLvls().lshift(shift); + cloned.getIterator().setType(collapsedSpace.getType().getIteratorType()); + cloned.setCrdUsedLvls(crdUsedLvls); + + rItOp.replaceAllUsesWith(cloned.getResults()); + // Erase collapsed loops. + rItOp.erase(); + root.erase(); +} + +struct SparseSpaceCollapsePass + : public impl::SparseSpaceCollapseBase { + SparseSpaceCollapsePass() = default; + + void runOnOperation() override { + func::FuncOp func = getOperation(); + + // A naive (experimental) implementation to collapse consecutive sparse + // spaces. It does NOT handle complex cases where multiple spaces are + // extracted in the same basic block. E.g., + // + // %space1 = extract_space %t1 ... + // %space2 = extract_space %t2 ... + // sparse_tensor.iterate(%sp1) ... + // + SmallVector toCollapse; + func->walk([&](ExtractIterSpaceOp op) { + if (!legalToCollapse(toCollapse, op)) { + // if not legal to collapse one more space, collapse the existing ones + // and clear. + collapseSparseSpace(toCollapse); + toCollapse.clear(); + } + }); + + collapseSparseSpace(toCollapse); + } +}; + +} // namespace + +std::unique_ptr mlir::createSparseSpaceCollapsePass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir b/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir new file mode 100644 index 00000000000000..baa6199f12bc38 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir @@ -0,0 +1,37 @@ +// RUN: mlir-opt %s --sparse-space-collapse | FileCheck %s + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +// CHECK-LABEL: func.func @sparse_sparse_collapse( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<4x8xf32, #sparse>, +// CHECK-SAME: %[[VAL_1:.*]]: index) { +// CHECK: %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 to 2 : tensor<4x8xf32, #sparse> +// CHECK: %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]], _) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) +// CHECK: %[[VAL_8:.*]] = "test.op"(%[[VAL_7]]) : (index) -> index +// CHECK: sparse_tensor.yield %[[VAL_8]] : index +// CHECK: } +// CHECK: "test.sink"(%[[VAL_4]]) : (index) -> () +// CHECK: return +// CHECK: } +func.func @sparse_sparse_collapse(%sp : tensor<4x8xf32, #COO>, %i : index) { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 + : tensor<4x8xf32, #COO> + -> !sparse_tensor.iter_space<#COO, lvls = 0> + %r1 = sparse_tensor.iterate %it1 in %l1 at(%crd0) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index { + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1> + -> !sparse_tensor.iter_space<#COO, lvls = 1> + %r2 = sparse_tensor.iterate %it2 in %l2 iter_args(%inner = %outer): !sparse_tensor.iter_space<#COO, lvls = 1 to 2> -> index { + %k ="test.op"(%inner) : (index) -> index + sparse_tensor.yield %k : index + } + sparse_tensor.yield %r2 : index + } + "test.sink"(%r1) : (index) -> () + return +} From 65614605dd99af80cc2218b8a43b0b0f6aebe11a Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 11 Jun 2024 21:23:32 +0200 Subject: [PATCH 25/38] [SPIR-V] Ensure that DuplicatesTracker is working with TypedPointers pointee types (#94952) This PR is a tweak to ensure that DuplicatesTracker is working with TypedPointers pointee types rather than with original llvm's untyped pointers. This enforces DuplicatesTracker promise to avoid emission of several identical OpTypePointer instructions. --- .../lib/Target/SPIRV/SPIRVDuplicatesTracker.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h index 2ec3fb35ca0451..3c8405fadd44e9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h @@ -16,6 +16,7 @@ #include "MCTargetDesc/SPIRVBaseInfo.h" #include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "SPIRVUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -285,10 +286,13 @@ class SPIRVGeneralDuplicatesTracker { TT.add(Ty, MF, R); } - void add(const Type *PointerElementType, unsigned AddressSpace, + void add(const Type *PointeeTy, unsigned AddressSpace, const MachineFunction *MF, Register R) { - ST.add(SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF, - R); + if (isUntypedPointerTy(PointeeTy)) + PointeeTy = + TypedPointerType::get(IntegerType::getInt8Ty(PointeeTy->getContext()), + getPointerAddressSpace(PointeeTy)); + ST.add(SPIRV::PointerTypeDescriptor(PointeeTy, AddressSpace), MF, R); } void add(const Constant *C, const MachineFunction *MF, Register R) { @@ -320,10 +324,13 @@ class SPIRVGeneralDuplicatesTracker { return TT.find(const_cast(Ty), MF); } - Register find(const Type *PointerElementType, unsigned AddressSpace, + Register find(const Type *PointeeTy, unsigned AddressSpace, const MachineFunction *MF) { - return ST.find( - SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF); + if (isUntypedPointerTy(PointeeTy)) + PointeeTy = + TypedPointerType::get(IntegerType::getInt8Ty(PointeeTy->getContext()), + getPointerAddressSpace(PointeeTy)); + return ST.find(SPIRV::PointerTypeDescriptor(PointeeTy, AddressSpace), MF); } Register find(const Constant *C, const MachineFunction *MF) { From 57520985e09f3c098a5f5a6f72e3107a8a1d5446 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 11 Jun 2024 21:23:48 +0200 Subject: [PATCH 26/38] [SPIR-V] Implement insertion of OpGenericCastToPtr using builtin functions (#95055) This PR implements insertion of OpGenericCastToPtr using builtin functions (both opencl `to_global|local|private` and `__spirv_` wrappers), and improves type inference. --- llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 138 ++++++++++-------- llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 12 ++ llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 52 +++++-- llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp | 1 + .../SPIRV/transcoding/OpGenericCastToPtr.ll | 138 ++++++++++++++++++ 5 files changed, 268 insertions(+), 73 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 49838e685a6d2b..6bb3e215240a87 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -300,6 +300,72 @@ lookupBuiltin(StringRef DemangledCall, return nullptr; } +static MachineInstr *getBlockStructInstr(Register ParamReg, + MachineRegisterInfo *MRI) { + // We expect the following sequence of instructions: + // %0:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.alloca) + // or = G_GLOBAL_VALUE @block_literal_global + // %1:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.bitcast), %0 + // %2:_(p4) = G_ADDRSPACE_CAST %1:_(pN) + MachineInstr *MI = MRI->getUniqueVRegDef(ParamReg); + assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST && + MI->getOperand(1).isReg()); + Register BitcastReg = MI->getOperand(1).getReg(); + MachineInstr *BitcastMI = MRI->getUniqueVRegDef(BitcastReg); + assert(isSpvIntrinsic(*BitcastMI, Intrinsic::spv_bitcast) && + BitcastMI->getOperand(2).isReg()); + Register ValueReg = BitcastMI->getOperand(2).getReg(); + MachineInstr *ValueMI = MRI->getUniqueVRegDef(ValueReg); + return ValueMI; +} + +// Return an integer constant corresponding to the given register and +// defined in spv_track_constant. +// TODO: maybe unify with prelegalizer pass. +static unsigned getConstFromIntrinsic(Register Reg, MachineRegisterInfo *MRI) { + MachineInstr *DefMI = MRI->getUniqueVRegDef(Reg); + assert(isSpvIntrinsic(*DefMI, Intrinsic::spv_track_constant) && + DefMI->getOperand(2).isReg()); + MachineInstr *DefMI2 = MRI->getUniqueVRegDef(DefMI->getOperand(2).getReg()); + assert(DefMI2->getOpcode() == TargetOpcode::G_CONSTANT && + DefMI2->getOperand(1).isCImm()); + return DefMI2->getOperand(1).getCImm()->getValue().getZExtValue(); +} + +// Return type of the instruction result from spv_assign_type intrinsic. +// TODO: maybe unify with prelegalizer pass. +static const Type *getMachineInstrType(MachineInstr *MI) { + MachineInstr *NextMI = MI->getNextNode(); + if (!NextMI) + return nullptr; + if (isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_name)) + if ((NextMI = NextMI->getNextNode()) == nullptr) + return nullptr; + Register ValueReg = MI->getOperand(0).getReg(); + if ((!isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_type) && + !isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_ptr_type)) || + NextMI->getOperand(1).getReg() != ValueReg) + return nullptr; + Type *Ty = getMDOperandAsType(NextMI->getOperand(2).getMetadata(), 0); + assert(Ty && "Type is expected"); + return Ty; +} + +static const Type *getBlockStructType(Register ParamReg, + MachineRegisterInfo *MRI) { + // In principle, this information should be passed to us from Clang via + // an elementtype attribute. However, said attribute requires that + // the function call be an intrinsic, which is not. Instead, we rely on being + // able to trace this to the declaration of a variable: OpenCL C specification + // section 6.12.5 should guarantee that we can do this. + MachineInstr *MI = getBlockStructInstr(ParamReg, MRI); + if (MI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) + return MI->getOperand(1).getGlobal()->getType(); + assert(isSpvIntrinsic(*MI, Intrinsic::spv_alloca) && + "Blocks in OpenCL C must be traceable to allocation site"); + return getMachineInstrType(MI); +} + //===----------------------------------------------------------------------===// // Helper functions for building misc instructions //===----------------------------------------------------------------------===// @@ -1371,6 +1437,14 @@ static bool generateBarrierInst(const SPIRV::IncomingCall *Call, return buildBarrierInst(Call, Opcode, MIRBuilder, GR); } +static bool generateCastToPtrInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder) { + MIRBuilder.buildInstr(TargetOpcode::G_ADDRSPACE_CAST) + .addDef(Call->ReturnRegister) + .addUse(Call->Arguments[0]); + return true; +} + static bool generateDotOrFMulInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -1847,68 +1921,6 @@ static bool buildNDRange(const SPIRV::IncomingCall *Call, .addUse(TmpReg); } -static MachineInstr *getBlockStructInstr(Register ParamReg, - MachineRegisterInfo *MRI) { - // We expect the following sequence of instructions: - // %0:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.alloca) - // or = G_GLOBAL_VALUE @block_literal_global - // %1:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.bitcast), %0 - // %2:_(p4) = G_ADDRSPACE_CAST %1:_(pN) - MachineInstr *MI = MRI->getUniqueVRegDef(ParamReg); - assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST && - MI->getOperand(1).isReg()); - Register BitcastReg = MI->getOperand(1).getReg(); - MachineInstr *BitcastMI = MRI->getUniqueVRegDef(BitcastReg); - assert(isSpvIntrinsic(*BitcastMI, Intrinsic::spv_bitcast) && - BitcastMI->getOperand(2).isReg()); - Register ValueReg = BitcastMI->getOperand(2).getReg(); - MachineInstr *ValueMI = MRI->getUniqueVRegDef(ValueReg); - return ValueMI; -} - -// Return an integer constant corresponding to the given register and -// defined in spv_track_constant. -// TODO: maybe unify with prelegalizer pass. -static unsigned getConstFromIntrinsic(Register Reg, MachineRegisterInfo *MRI) { - MachineInstr *DefMI = MRI->getUniqueVRegDef(Reg); - assert(isSpvIntrinsic(*DefMI, Intrinsic::spv_track_constant) && - DefMI->getOperand(2).isReg()); - MachineInstr *DefMI2 = MRI->getUniqueVRegDef(DefMI->getOperand(2).getReg()); - assert(DefMI2->getOpcode() == TargetOpcode::G_CONSTANT && - DefMI2->getOperand(1).isCImm()); - return DefMI2->getOperand(1).getCImm()->getValue().getZExtValue(); -} - -// Return type of the instruction result from spv_assign_type intrinsic. -// TODO: maybe unify with prelegalizer pass. -static const Type *getMachineInstrType(MachineInstr *MI) { - MachineInstr *NextMI = MI->getNextNode(); - if (isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_name)) - NextMI = NextMI->getNextNode(); - Register ValueReg = MI->getOperand(0).getReg(); - if (!isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_type) || - NextMI->getOperand(1).getReg() != ValueReg) - return nullptr; - Type *Ty = getMDOperandAsType(NextMI->getOperand(2).getMetadata(), 0); - assert(Ty && "Type is expected"); - return Ty; -} - -static const Type *getBlockStructType(Register ParamReg, - MachineRegisterInfo *MRI) { - // In principle, this information should be passed to us from Clang via - // an elementtype attribute. However, said attribute requires that - // the function call be an intrinsic, which is not. Instead, we rely on being - // able to trace this to the declaration of a variable: OpenCL C specification - // section 6.12.5 should guarantee that we can do this. - MachineInstr *MI = getBlockStructInstr(ParamReg, MRI); - if (MI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) - return MI->getOperand(1).getGlobal()->getType(); - assert(isSpvIntrinsic(*MI, Intrinsic::spv_alloca) && - "Blocks in OpenCL C must be traceable to allocation site"); - return getMachineInstrType(MI); -} - // TODO: maybe move to the global register. static SPIRVType * getOrCreateSPIRVDeviceEventPointer(MachineIRBuilder &MIRBuilder, @@ -2322,6 +2334,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateAtomicFloatingInst(Call.get(), MIRBuilder, GR); case SPIRV::Barrier: return generateBarrierInst(Call.get(), MIRBuilder, GR); + case SPIRV::CastToPtr: + return generateCastToPtrInst(Call.get(), MIRBuilder); case SPIRV::Dot: return generateDotOrFMulInst(Call.get(), MIRBuilder, GR); case SPIRV::Wave: diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index edc9e1a33d9f5a..2edd2992425bd0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -59,6 +59,7 @@ def IntelSubgroups : BuiltinGroup; def AtomicFloating : BuiltinGroup; def GroupUniform : BuiltinGroup; def KernelClock : BuiltinGroup; +def CastToPtr : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -595,6 +596,17 @@ defm : DemangledNativeBuiltin<"__spirv_GroupWaitEvents", OpenCL_std, AsyncCopy, defm : DemangledNativeBuiltin<"__spirv_Load", OpenCL_std, LoadStore, 1, 3, OpLoad>; defm : DemangledNativeBuiltin<"__spirv_Store", OpenCL_std, LoadStore, 2, 4, OpStore>; +// Address Space Qualifier Functions/Pointers Conversion Instructions: +defm : DemangledNativeBuiltin<"to_global", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"to_local", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"to_private", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToGlobal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToLocal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToPrivate", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToGlobal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToLocal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToPrivate", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 7b8e3230bf5534..5c10e04325d515 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -69,7 +69,7 @@ class SPIRVEmitIntrinsics DenseSet AggrStores; // deduce element type of untyped pointers - Type *deduceElementType(Value *I); + Type *deduceElementType(Value *I, bool UnknownElemTypeI8); Type *deduceElementTypeHelper(Value *I); Type *deduceElementTypeHelper(Value *I, std::unordered_set &Visited); Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, @@ -105,7 +105,8 @@ class SPIRVEmitIntrinsics void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B); void processInstrAfterVisit(Instruction *I, IRBuilder<> &B); - void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B); + bool insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B, + bool UnknownElemTypeI8); void insertAssignTypeIntrs(Instruction *I, IRBuilder<> &B); void insertAssignPtrTypeTargetExt(TargetExtType *AssignedType, Value *V, IRBuilder<> &B); @@ -367,6 +368,23 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( if (Ty) break; } + } else if (auto *CI = dyn_cast(I)) { + static StringMap ResTypeByArg = { + {"to_global", 0}, + {"to_local", 0}, + {"to_private", 0}, + {"__spirv_GenericCastToPtr_ToGlobal", 0}, + {"__spirv_GenericCastToPtr_ToLocal", 0}, + {"__spirv_GenericCastToPtr_ToPrivate", 0}}; + // TODO: maybe improve performance by caching demangled names + if (Function *CalledF = CI->getCalledFunction()) { + std::string DemangledName = + getOclOrSpirvBuiltinDemangledName(CalledF->getName()); + auto AsArgIt = ResTypeByArg.find(DemangledName); + if (AsArgIt != ResTypeByArg.end()) + Ty = deduceElementTypeHelper(CI->getArgOperand(AsArgIt->second), + Visited); + } } // remember the found relationship @@ -460,10 +478,10 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( return OrigTy; } -Type *SPIRVEmitIntrinsics::deduceElementType(Value *I) { +Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) { if (Type *Ty = deduceElementTypeHelper(I)) return Ty; - return IntegerType::getInt8Ty(I->getContext()); + return UnknownElemTypeI8 ? IntegerType::getInt8Ty(I->getContext()) : nullptr; } // If the Instruction has Pointer operands with unresolved types, this function @@ -1152,16 +1170,23 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV, B.CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV); } -void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, - IRBuilder<> &B) { +// Return true, if we can't decide what is the pointee type now and will get +// back to the question later. Return false is spv_assign_ptr_type is not needed +// or can be inserted immediately. +bool SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, + IRBuilder<> &B, + bool UnknownElemTypeI8) { reportFatalOnTokenType(I); if (!isPointerTy(I->getType()) || !requireAssignType(I) || isa(I)) - return; + return false; setInsertPointAfterDef(B, I); - Type *ElemTy = deduceElementType(I); - buildAssignPtr(B, ElemTy, I); + if (Type *ElemTy = deduceElementType(I, UnknownElemTypeI8)) { + buildAssignPtr(B, ElemTy, I); + return false; + } + return true; } void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, @@ -1199,7 +1224,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, buildAssignPtr(B, PType->getElementType(), Op); } else if (isPointerTy(OpTy)) { Type *ElemTy = GR->findDeducedElementType(Op); - buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op), Op); + buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op); } else { CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, {OpTy}, Op, Op, {}, B); @@ -1395,10 +1420,15 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { if (isConvergenceIntrinsic(I)) continue; - insertAssignPtrTypeIntrs(I, B); + bool Postpone = insertAssignPtrTypeIntrs(I, B, false); + // if Postpone is true, we can't decide on pointee type yet insertAssignTypeIntrs(I, B); insertPtrCastOrAssignTypeInstr(I, B); insertSpirvDecorations(I, B); + // if instruction requires a pointee type set, let's check if we know it + // already, and force it to be i8 if not + if (Postpone && !GR->findAssignPtrTypeInstr(I)) + insertAssignPtrTypeIntrs(I, B, true); } for (auto &I : instructions(Func)) diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 5ccbaf12ddee2e..4383d1c5c0e25d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -339,6 +339,7 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { GR.getSPIRVTypeForVReg(MI.getOperand(1).getReg())); break; case SPIRV::OpPtrCastToGeneric: + case SPIRV::OpGenericCastToPtr: validateAccessChain(STI, MRI, GR, MI); break; case SPIRV::OpInBoundsPtrAccessChain: diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll new file mode 100644 index 00000000000000..e3a82b3577701b --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll @@ -0,0 +1,138 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: %[[#Char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#GlobalCharPtr:]] = OpTypePointer CrossWorkgroup %[[#Char]] +; CHECK-SPIRV-DAG: %[[#LocalCharPtr:]] = OpTypePointer Workgroup %[[#Char]] +; CHECK-SPIRV-DAG: %[[#PrivateCharPtr:]] = OpTypePointer Function %[[#Char]] +; CHECK-SPIRV-DAG: %[[#GenericCharPtr:]] = OpTypePointer Generic %[[#Char]] + +; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#GlobalIntPtr:]] = OpTypePointer CrossWorkgroup %[[#Int]] +; CHECK-SPIRV-DAG: %[[#PrivateIntPtr:]] = OpTypePointer Function %[[#Int]] +; CHECK-SPIRV-DAG: %[[#GenericIntPtr:]] = OpTypePointer Generic %[[#Int]] + +%id = type { %arr } +%arr = type { [1 x i64] } + +@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64> + +; Mangling + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateCharPtr]] +; CHECK-SPIRV: OpFunctionEnd + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateCharPtr]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @test1(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) %var1, i32 5) + %L = call spir_func ptr addrspace(3) @_Z32__spirv_GenericCastToPtr_ToLocalPvi(ptr addrspace(4) %var2, i32 4) + %P = call spir_func ptr @_Z34__spirv_GenericCastToPtr_ToPrivatePvi(ptr addrspace(4) %var3, i32 7) + ret void +} + +define spir_kernel void @test2(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @_Z9to_globalPv(ptr addrspace(4) %var1) + %L = call spir_func ptr addrspace(3) @_Z8to_localPv(ptr addrspace(4) %var2) + %P = call spir_func ptr @_Z10to_privatePv(ptr addrspace(4) %var3) + ret void +} + +declare spir_func ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4), i32) +declare spir_func ptr addrspace(3) @_Z32__spirv_GenericCastToPtr_ToLocalPvi(ptr addrspace(4), i32) +declare spir_func ptr @_Z34__spirv_GenericCastToPtr_ToPrivatePvi(ptr addrspace(4), i32) + +declare spir_func ptr addrspace(1) @_Z9to_globalPv(ptr addrspace(4)) +declare spir_func ptr addrspace(3) @_Z8to_localPv(ptr addrspace(4)) +declare spir_func ptr @_Z10to_privatePv(ptr addrspace(4)) + +; No mangling + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateIntPtr]] +; CHECK-SPIRV: OpFunctionEnd + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateIntPtr]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @test3(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @__spirv_GenericCastToPtr_ToGlobal(ptr addrspace(4) %var1, i32 5) + %L = call spir_func ptr addrspace(3) @__spirv_GenericCastToPtr_ToLocal(ptr addrspace(4) %var2, i32 4) + %P = call spir_func ptr @__spirv_GenericCastToPtr_ToPrivate(ptr addrspace(4) %var3, i32 7) + ret void +} + +define spir_kernel void @test4(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @to_global(ptr addrspace(4) %var1) + %L = call spir_func ptr addrspace(3) @to_local(ptr addrspace(4) %var2) + %P = call spir_func ptr @to_private(ptr addrspace(4) %var3) + ret void +} + +declare spir_func ptr addrspace(1) @__spirv_GenericCastToPtr_ToGlobal(ptr addrspace(4), i32) +declare spir_func ptr addrspace(3) @__spirv_GenericCastToPtr_ToLocal(ptr addrspace(4), i32) +declare spir_func ptr @__spirv_GenericCastToPtr_ToPrivate(ptr addrspace(4), i32) + +declare spir_func ptr addrspace(1) @to_global(ptr addrspace(4)) +declare spir_func ptr addrspace(3) @to_local(ptr addrspace(4)) +declare spir_func ptr @to_private(ptr addrspace(4)) From 6afbda7130c343be34b2f3c765b9c4c1b251b671 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Tue, 11 Jun 2024 21:51:28 +0200 Subject: [PATCH 27/38] [lld-macho] Fix duplicate GOT entries for personality functions (#95054) As stated in `UnwindInfoSectionImpl::prepareRelocations`'s comments, the unwind info uses section+addend relocations for personality functions defined in the same file as the function itself. As personality functions are always accessed via the GOT, we need to resolve those to a symbol. Previously, we did this by keeping a map which resolves these to symbols, creating a synthetic symbol if we didn't find it in the map. This approach has an issue: if we process the object file containing the personality function before any external uses, the entry in the map remains unpopulated, so we create a synthetic symbol and a corresponding GOT entry. If we encounter a relocation to it in a later file which requires GOT (such as in `__eh_frame`), we add that symbol to the GOT, too, effectively creating two entries which point to the same piece of code. This commit fixes that by searching the personality function's section for a symbol at that offset which already has a GOT entry, and only creating a synthetic symbol if there is none. As all non-unwind sections are already processed by this point, it ensures no duplication. This should only really affect our tests (and make them clearer), as personality functions are usually defined in platform runtime libraries. Or even if they are local, they are likely not in the first object file to be linked. --- lld/MachO/UnwindInfoSection.cpp | 34 +++++++++++++------ ...-unwind-both-local-and-dylib-personality.s | 15 ++++---- lld/test/MachO/compact-unwind.s | 2 +- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 0ac2f39a6180c7..7033481d6014b5 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -298,19 +298,31 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { assert(!isCoalescedWeak(referentIsec)); // Personality functions can be referenced via section relocations // if they live in the same object file. Create placeholder synthetic - // symbols for them in the GOT. + // symbols for them in the GOT. If the corresponding symbol is already + // in the GOT, use that to avoid creating a duplicate entry. All GOT + // entries needed by non-unwind sections will have already been added + // by this point. Symbol *&s = personalityTable[{referentIsec, r.addend}]; if (s == nullptr) { - // This runs after dead stripping, so the noDeadStrip argument does not - // matter. - s = make("", /*file=*/nullptr, referentIsec, - r.addend, /*size=*/0, /*isWeakDef=*/false, - /*isExternal=*/false, /*isPrivateExtern=*/false, - /*includeInSymtab=*/true, - /*isReferencedDynamically=*/false, - /*noDeadStrip=*/false); - s->used = true; - in.got->addEntry(s); + Defined *const *gotEntry = + llvm::find_if(referentIsec->symbols, [&](Defined const *d) { + return d->value == static_cast(r.addend) && + d->isInGot(); + }); + if (gotEntry != referentIsec->symbols.end()) { + s = *gotEntry; + } else { + // This runs after dead stripping, so the noDeadStrip argument does + // not matter. + s = make("", /*file=*/nullptr, referentIsec, + r.addend, /*size=*/0, /*isWeakDef=*/false, + /*isExternal=*/false, /*isPrivateExtern=*/false, + /*includeInSymtab=*/true, + /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); + s->used = true; + in.got->addEntry(s); + } } r.referent = s; r.addend = 0; diff --git a/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s b/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s index 676577d6b17e9f..35f39ba5fb1e21 100644 --- a/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s +++ b/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s @@ -42,19 +42,20 @@ # RUN: llvm-objdump --macho --indirect-symbols --unwind-info --bind %t/d.out | FileCheck %s --check-prefixes=D -D#%x,OFF=0x100000000 -# A: Indirect symbols for (__DATA_CONST,__got) +# A: Indirect symbols for (__DATA_CONST,__got) 4 entries # A-NEXT: address index name # A: 0x[[#%x,GXX_PERSONALITY_LO:]] [[#]] ___gxx_personality_v0 +# A: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 +# A: 0x[[#%x,PERSONALITY_2:]] [[#]] _personality_2 # A: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 -# A: 0x[[#%x,PERSONALITY_1:]] LOCAL -# A: 0x[[#%x,PERSONALITY_2:]] LOCAL # BC: Indirect symbols for (__DATA_CONST,__got) # BC-NEXT: address index name -# C: 0x[[#%x,GXX_PERSONALITY_HI:]] LOCAL # BC: 0x[[#%x,GXX_PERSONALITY_LO:]] LOCAL -# BC: 0x[[#%x,PERSONALITY_1:]] LOCAL -# BC: 0x[[#%x,PERSONALITY_2:]] LOCAL +# C: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 +# BC: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 +# BC: 0x[[#%x,PERSONALITY_2:]] [[#]] _personality_2 +# BC-EMPTY: # CHECK: Personality functions: (count = 3) # CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY_LO-OFF]] @@ -66,7 +67,7 @@ # A-NEXT: __DATA_CONST __got 0x[[#GXX_PERSONALITY_LO-0]] pointer 0 libc++abi ___gxx_personality_v0 -# D: Indirect symbols for (__DATA_CONST,__got) +# D: Indirect symbols for (__DATA_CONST,__got) 6 entries # D-NEXT: address index name # D: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 # D: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 diff --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s index fa73ccb10a32a2..27e4b44dc0b09f 100644 --- a/lld/test/MachO/compact-unwind.s +++ b/lld/test/MachO/compact-unwind.s @@ -29,7 +29,7 @@ # FIRST: Indirect symbols for (__DATA_CONST,__got) # FIRST-NEXT: address index name # FIRST-DAG: 0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0 -# FIRST-DAG: 0x[[#%x,MY_PERSONALITY:]] LOCAL +# FIRST-DAG: 0x[[#%x,MY_PERSONALITY:]] # SECOND: Indirect symbols for (__DATA_CONST,__got) # SECOND-NEXT: address index name From 163d036d64609bf59183664aec244da5078dc1f1 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 11 Jun 2024 21:56:39 +0200 Subject: [PATCH 28/38] [SPIR-V] Validate and fix bit width of scalar registers (#95147) This PR improves legalization process of SPIR-V instructions. Namely, it introduces validation and fixing of bit width of scalar registers as a part of pre-legalizer. A test case is added that demonstrates ability to legalize instructions with non 8/16/32/64 bit width both with and without vendor-specific SPIR-V extension (SPV_INTEL_arbitrary_precision_integers). In the case of absence of the extension, a generated SPIR-V code will fallback to 8/16/32/64 bit width in OpTypeInt, but SPIR-V Backend still is able to legalize operations with original integer sizes. --- llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 25 +++++++-- .../CodeGen/SPIRV/trunc-nonstd-bitwidth.ll | 56 +++++++++++++++++++ 2 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index adc5b36af6f182..53e0432192ca91 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -271,6 +271,21 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR, return SpirvTy; } +// To support current approach and limitations wrt. bit width here we widen a +// scalar register with a bit width greater than 1 to valid sizes and cap it to +// 64 width. +static void widenScalarLLTNextPow2(Register Reg, MachineRegisterInfo &MRI) { + LLT RegType = MRI.getType(Reg); + if (!RegType.isScalar()) + return; + unsigned Sz = RegType.getScalarSizeInBits(); + if (Sz == 1) + return; + unsigned NewSz = std::min(std::max(1u << Log2_32_Ceil(Sz), 8u), 64u); + if (NewSz != Sz) + MRI.setType(Reg, LLT::scalar(NewSz)); +} + static std::pair createNewIdReg(SPIRVType *SpvType, Register SrcReg, MachineRegisterInfo &MRI, const SPIRVGlobalRegistry &GR) { @@ -406,6 +421,11 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineInstr &MI = *MII; unsigned MIOp = MI.getOpcode(); + // validate bit width of scalar registers + for (const auto &MOP : MI.operands()) + if (MOP.isReg()) + widenScalarLLTNextPow2(MOP.getReg(), MRI); + if (isSpvIntrinsic(MI, Intrinsic::spv_assign_ptr_type)) { Register Reg = MI.getOperand(1).getReg(); MIB.setInsertPt(*MI.getParent(), MI.getIterator()); @@ -475,11 +495,6 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI); } else if (MIOp == TargetOpcode::G_GLOBAL_VALUE) { propagateSPIRVType(&MI, GR, MRI, MIB); - } else if (MIOp == TargetOpcode::G_BITREVERSE) { - Register Reg = MI.getOperand(0).getReg(); - LLT RegType = MRI.getType(Reg); - if (RegType.getSizeInBits() < 32) - MRI.setType(Reg, LLT::scalar(32)); } if (MII == Begin) diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll new file mode 100644 index 00000000000000..437e161864eca5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll @@ -0,0 +1,56 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT + +; CHECK-DAG: OpName %[[#Struct:]] "struct" +; CHECK-DAG: OpName %[[#Arg:]] "arg" +; CHECK-DAG: OpName %[[#QArg:]] "qarg" +; CHECK-DAG: OpName %[[#R:]] "r" +; CHECK-DAG: OpName %[[#Q:]] "q" +; CHECK-DAG: OpName %[[#Tr:]] "tr" +; CHECK-DAG: OpName %[[#Tq:]] "tq" +; CHECK-DAG: %[[#Struct]] = OpTypeStruct %[[#]] %[[#]] %[[#]] +; CHECK-DAG: %[[#PtrStruct:]] = OpTypePointer CrossWorkgroup %[[#Struct]] +; CHECK-EXT-DAG: %[[#Int40:]] = OpTypeInt 40 0 +; CHECK-EXT-DAG: %[[#Int50:]] = OpTypeInt 50 0 +; CHECK-NOEXT-DAG: %[[#Int40:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#PtrInt40:]] = OpTypePointer CrossWorkgroup %[[#Int40]] + +; CHECK: OpFunction + +; CHECK-EXT: %[[#Tr]] = OpUConvert %[[#Int40]] %[[#R]] +; CHECK-EXT: %[[#Store:]] = OpInBoundsPtrAccessChain %[[#PtrStruct]] %[[#Arg]] %[[#]] +; CHECK-EXT: %[[#StoreAsInt40:]] = OpBitcast %[[#PtrInt40]] %[[#Store]] +; CHECK-EXT: OpStore %[[#StoreAsInt40]] %[[#Tr]] + +; CHECK-NOEXT: %[[#Store:]] = OpInBoundsPtrAccessChain %[[#PtrStruct]] %[[#Arg]] %[[#]] +; CHECK-NOEXT: %[[#StoreAsInt40:]] = OpBitcast %[[#PtrInt40]] %[[#Store]] +; CHECK-NOEXT: OpStore %[[#StoreAsInt40]] %[[#R]] + +; CHECK: OpFunction + +; CHECK-EXT: %[[#Tq]] = OpUConvert %[[#Int40]] %[[#Q]] +; CHECK-EXT: OpStore %[[#QArg]] %[[#Tq]] + +; CHECK-NOEXT: OpStore %[[#QArg]] %[[#Q]] + +%struct = type <{ i32, i8, [3 x i8] }> + +define spir_kernel void @foo(ptr addrspace(1) %arg, i64 %r) { + %tr = trunc i64 %r to i40 + %addr = getelementptr inbounds %struct, ptr addrspace(1) %arg, i64 0 + store i40 %tr, ptr addrspace(1) %addr + ret void +} + +define spir_kernel void @bar(ptr addrspace(1) %qarg, i50 %q) { + %tq = trunc i50 %q to i40 + store i40 %tq, ptr addrspace(1) %qarg + ret void +} From 982b4b6f4d5ddf04ed5e85aea7074c9b26f29673 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Tue, 11 Jun 2024 13:14:59 -0700 Subject: [PATCH 29/38] [lldb] Fix declaration of thread argument in CommandObjectThreadStepWithTypeAndScope (#95146) `thread step-in` (and other step commands) take a ``, not a ``. --- lldb/source/Commands/CommandObjectThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index db96ee2cec383e..bb2be560ebfff3 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -383,7 +383,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { eCommandProcessMustBePaused), m_step_type(step_type), m_step_scope(step_scope), m_class_options("scripted step") { - AddSimpleArgumentList(eArgTypeThreadID, eArgRepeatOptional); + AddSimpleArgumentList(eArgTypeThreadIndex, eArgRepeatOptional); if (step_type == eStepTypeScripted) { m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2, From 16f2aa1a2ddfcb99e34da5af9d75eaeeb97bb9ce Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 11 Jun 2024 16:45:12 -0400 Subject: [PATCH 30/38] [libc++] Tweak how we check constraints on shared_ptr(nullptr_t) (#94996) This avoids breaking code that should arguably be valid but technically isn't after enforcing the constraints on shared_ptr's constructors. A new LWG issue was filed to fix this in the Standard. This patch applies the expected resolution of this issue to avoid flip-flopping users whose code should always be considered valid. See #93071 for more context. --- libcxx/include/__memory/shared_ptr.h | 2 +- .../nullptr_t_deleter.pass.cpp | 7 +++---- .../nullptr_t_deleter_allocator.pass.cpp | 12 ++++++++---- .../pointer_deleter.pass.cpp | 8 ++++++++ .../pointer_deleter_allocator.pass.cpp | 8 ++++++++ 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 00db96185be7c6..7b5002cb95d32b 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -404,7 +404,7 @@ struct __shared_ptr_deleter_ctor_reqs { }; template -using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, nullptr_t> >; +using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, _Tp*> >; #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__)) diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp index 13340ed5294c05..4ea752b36bd018 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp @@ -32,17 +32,16 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index 53ca6fb5b234d4..a479b24c4595ab 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -33,17 +33,21 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert( + std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert( + std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 562acf56d96fe1..97dd2fcb22d1a3 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -115,6 +115,14 @@ int main(int, char**) } #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER >= 14 + { + // See https://github.com/llvm/llvm-project/pull/93071#issuecomment-2158494851 + auto deleter = [](auto pointer) { delete pointer; }; + std::shared_ptr p(new int, deleter); + } +#endif + test_function_type(); return 0; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index 9dffbcdd59a735..b90c69efd94abb 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -165,5 +165,13 @@ int main(int, char**) test_allocator >::value, ""); } +#if TEST_STD_VER >= 14 + { + // See https://github.com/llvm/llvm-project/pull/93071#issuecomment-2158494851 + auto deleter = [](auto pointer) { delete pointer; }; + std::shared_ptr p(new int, deleter, std::allocator()); + } +#endif + return 0; } From f638f7b6a7c22bc802bb9620acfa10a00d4217cf Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 11 Jun 2024 16:48:35 -0400 Subject: [PATCH 31/38] [libc++] Update with LWG issue number for shared-ptr constructor --- libcxx/docs/Status/Cxx2cIssues.csv | 1 + .../util.smartptr.shared.const/pointer_deleter.pass.cpp | 2 +- .../pointer_deleter_allocator.pass.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 8d24457186310c..28359b7bb49ac4 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -65,4 +65,5 @@ "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" "XXXX","","The sys_info range should be affected by save","Not Yet Adopted","|Complete|","19.0" "`4071 `__","","``reference_wrapper`` comparisons are not SFINAE-friendly","Not Yet Adopted","|Complete|","19.0" +"`4110 `__","","``shared_ptr(nullptr_t, Deleter)`` is overconstrained, breaking some sensible deleters","Not Yet Adopted","|Complete|","19.0" "","","","","","" diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 97dd2fcb22d1a3..95dcb92b51993c 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -117,7 +117,7 @@ int main(int, char**) #if TEST_STD_VER >= 14 { - // See https://github.com/llvm/llvm-project/pull/93071#issuecomment-2158494851 + // LWG 4110 auto deleter = [](auto pointer) { delete pointer; }; std::shared_ptr p(new int, deleter); } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index b90c69efd94abb..89e7d0b02d421b 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -167,7 +167,7 @@ int main(int, char**) #if TEST_STD_VER >= 14 { - // See https://github.com/llvm/llvm-project/pull/93071#issuecomment-2158494851 + // LWG 4110 auto deleter = [](auto pointer) { delete pointer; }; std::shared_ptr p(new int, deleter, std::allocator()); } From 6faae130e4ea34f8b092fdfcd866fbb5861bfc49 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 11 Jun 2024 16:55:56 -0400 Subject: [PATCH 32/38] [libc++] Simplify the definition of string::operator== (#95000) Instead of hardcoding a loop for small strings, always call char_traits::compare which ends up desugaring to __builtin_memcmp. Note that the original code dates back 11 years, when we didn't lower to intrinsics in `char_traits::compare`. Fixes #94222 --- libcxx/include/string | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 1db803e822d727..5301f8a87d9bb1 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -3746,17 +3746,10 @@ template inline _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool operator==(const basic_string, _Allocator>& __lhs, const basic_string, _Allocator>& __rhs) _NOEXCEPT { - size_t __lhs_sz = __lhs.size(); - if (__lhs_sz != __rhs.size()) + size_t __sz = __lhs.size(); + if (__sz != __rhs.size()) return false; - const char* __lp = __lhs.data(); - const char* __rp = __rhs.data(); - if (__lhs.__is_long()) - return char_traits::compare(__lp, __rp, __lhs_sz) == 0; - for (; __lhs_sz != 0; --__lhs_sz, ++__lp, ++__rp) - if (*__lp != *__rp) - return false; - return true; + return char_traits::compare(__lhs.data(), __rhs.data(), __sz) == 0; } #if _LIBCPP_STD_VER <= 17 From cce1feb7b1caf6bd5fab311a787481a3f6a9dcdf Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 11 Jun 2024 22:58:00 +0200 Subject: [PATCH 33/38] [libc++][NFC] Remove some dead code in string (#94893) It looks like the last references got removed in c747bd0e2339. It removed a __zero() function, which was probably created at some point in the ancient past to optimize copying the string representation. The __zero() function got simplified to an assignment as part of making string constexpr, rendering this code unnecessary. --- libcxx/include/string | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 5301f8a87d9bb1..751af8f1476d0d 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -868,23 +868,9 @@ private: static_assert(sizeof(__short) == (sizeof(value_type) * (__min_cap + 1)), "__short has an unexpected size."); - union __ulx { - __long __lx; - __short __lxx; - }; - - enum { __n_words = sizeof(__ulx) / sizeof(size_type) }; - - struct __raw { - size_type __words[__n_words]; - }; - - struct __rep { - union { - __short __s; - __long __l; - __raw __r; - }; + union __rep { + __short __s; + __long __l; }; __compressed_pair<__rep, allocator_type> __r_; From a118f5f398bf099ec76ebf889234ebbc58b28f0c Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 11 Jun 2024 13:58:26 -0700 Subject: [PATCH 34/38] Fix type lookup bug where wrong decl context was being used for a DIE. (#94846) The function that calculated the declaration context for a DIE was incorrectly transparently traversing acrosss DW_TAG_subprogram dies when climbing the parent DIE chain. This meant that types defined in functions would appear to have the declaration context of anything above the function. I fixed the GetTypeLookupContextImpl(...) function in DWARFDIE.cpp to not transparently skip over functions, lexical blocks and inlined functions and compile and type units. Added a test to verify things are working. --- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 12 ++ .../API/functionalities/type_types/Makefile | 2 + .../type_types/TestFindTypes.py | 66 +++++++++++ .../API/functionalities/type_types/main.cpp | 15 +++ .../SymbolFile/DWARF/DWARFDIETest.cpp | 107 ++++++++++++++++++ 5 files changed, 202 insertions(+) create mode 100644 lldb/test/API/functionalities/type_types/Makefile create mode 100644 lldb/test/API/functionalities/type_types/TestFindTypes.py create mode 100644 lldb/test/API/functionalities/type_types/main.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index 0ef94ed9f17c34..992d814793f9d2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -477,6 +477,18 @@ static void GetTypeLookupContextImpl(DWARFDIE die, case DW_TAG_base_type: push_ctx(CompilerContextKind::Builtin, name); break; + // If any of the tags below appear in the parent chain, stop the decl + // context and return. Prior to these being in here, if a type existed in a + // namespace "a" like "a::my_struct", but we also have a function in that + // same namespace "a" which contained a type named "my_struct", both would + // return "a::my_struct" as the declaration context since the + // DW_TAG_subprogram would be skipped and its parent would be found. + case DW_TAG_compile_unit: + case DW_TAG_type_unit: + case DW_TAG_subprogram: + case DW_TAG_lexical_block: + case DW_TAG_inlined_subroutine: + return; default: break; } diff --git a/lldb/test/API/functionalities/type_types/Makefile b/lldb/test/API/functionalities/type_types/Makefile new file mode 100644 index 00000000000000..3d0b98f13f3d7b --- /dev/null +++ b/lldb/test/API/functionalities/type_types/Makefile @@ -0,0 +1,2 @@ +CXX_SOURCES := main.cpp +include Makefile.rules diff --git a/lldb/test/API/functionalities/type_types/TestFindTypes.py b/lldb/test/API/functionalities/type_types/TestFindTypes.py new file mode 100644 index 00000000000000..42b5c4cfaaf77c --- /dev/null +++ b/lldb/test/API/functionalities/type_types/TestFindTypes.py @@ -0,0 +1,66 @@ +""" +Test the SBModule and SBTarget type lookup APIs to find multiple types. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TypeFindFirstTestCase(TestBase): + def test_find_first_type(self): + """ + Test SBTarget::FindTypes() and SBModule::FindTypes() APIs. + + We had issues where our declaration context when finding types was + incorrectly calculated where a type in a namepace, and a type in a + function that was also in the same namespace would match a lookup. For + example: + + namespace a { + struct Foo { + int foo; + }; + + unsigned foo() { + typedef unsigned Foo; + Foo foo = 12; + return foo; + } + } // namespace a + + + Previously LLDB would calculate the declaration context of "a::Foo" + correctly, but incorrectly calculate the declaration context of "Foo" + from within the foo() function as "a::Foo". Adding tests to ensure this + works correctly. + """ + self.build() + target = self.createTestTarget() + exe_module = target.GetModuleAtIndex(0) + self.assertTrue(exe_module.IsValid()) + # Test the SBTarget and SBModule APIs for FindFirstType + for api in [target, exe_module]: + # We should find the "a::Foo" but not the "Foo" type in the function + types = api.FindTypes("a::Foo") + self.assertEqual(types.GetSize(), 1) + type_str0 = str(types.GetTypeAtIndex(0)) + self.assertIn('struct Foo {', type_str0) + + # When we search by type basename, we should find any type whose + # basename matches "Foo", so "a::Foo" and the "Foo" type in the + # function. + types = api.FindTypes("Foo") + self.assertEqual(types.GetSize(), 2) + type_str0 = str(types.GetTypeAtIndex(0)) + type_str1 = str(types.GetTypeAtIndex(1)) + # We don't know which order the types will come back as, so + self.assertEqual(set([str(t).split('\n')[0] for t in types]), set(["typedef Foo", "struct Foo {"])) + + # When we search by type basename with "::" prepended, we should + # only types in the root namespace which means only "Foo" type in + # the function. + types = api.FindTypes("::Foo") + self.assertEqual(types.GetSize(), 1) + type_str0 = str(types.GetTypeAtIndex(0)) + self.assertIn('typedef Foo', type_str0) diff --git a/lldb/test/API/functionalities/type_types/main.cpp b/lldb/test/API/functionalities/type_types/main.cpp new file mode 100644 index 00000000000000..095328932cdc46 --- /dev/null +++ b/lldb/test/API/functionalities/type_types/main.cpp @@ -0,0 +1,15 @@ +namespace a { +struct Foo {}; + +unsigned foo() { + typedef unsigned Foo; + Foo foo = 12; + return foo; +} +} // namespace a + +int main() { + a::Foo f = {}; + a::foo(); + return 0; +} diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp index bea07dfa27cc6a..65da7de1ba2d8a 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp @@ -258,3 +258,110 @@ TEST(DWARFDIETest, GetContext) { struct_die.GetTypeLookupContext(), testing::ElementsAre(make_namespace("NAMESPACE"), make_struct("STRUCT"))); } + +TEST(DWARFDIETest, GetContextInFunction) { + // Make sure we get the right context fo each "struct_t" type. The first + // should be "a::struct_t" and the one defined in the "foo" function should be + // "struct_t". Previous DWARFDIE::GetTypeLookupContext() function calls would + // have the "struct_t" in "foo" be "a::struct_t" because it would traverse the + // entire die parent tree and ignore DW_TAG_subprogram and keep traversing the + // parents. + // + // 0x0000000b: DW_TAG_compile_unit + // 0x0000000c: DW_TAG_namespace + // DW_AT_name("a") + // 0x0000000f: DW_TAG_structure_type + // DW_AT_name("struct_t") + // 0x00000019: DW_TAG_subprogram + // DW_AT_name("foo") + // 0x0000001e: DW_TAG_structure_type + // DW_AT_name("struct_t") + // 0x00000028: NULL + // 0x00000029: NULL + // 0x0000002a: NULL + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_str: + - '' + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + - Code: 0x2 + Tag: DW_TAG_namespace + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x4 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + debug_info: + - Length: 0x27 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + - AbbrCode: 0x2 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: a + - AbbrCode: 0x3 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: struct_t + - AbbrCode: 0x4 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: foo + - AbbrCode: 0x3 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: struct_t + - AbbrCode: 0x0 + - AbbrCode: 0x0 + - AbbrCode: 0x0)"; + + YAMLModuleTester t(yamldata); + auto *symbol_file = + llvm::cast(t.GetModule()->GetSymbolFile()); + DWARFUnit *unit = symbol_file->DebugInfo().GetUnitAtIndex(0); + ASSERT_TRUE(unit); + + auto make_namespace = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Namespace, ConstString(name)); + }; + auto make_struct = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Struct, ConstString(name)); + }; + // Grab the "a::struct_t" type from the "a" namespace + DWARFDIE a_struct_die = unit->DIE().GetFirstChild().GetFirstChild(); + ASSERT_TRUE(a_struct_die); + EXPECT_THAT( + a_struct_die.GetDeclContext(), + testing::ElementsAre(make_namespace("a"), make_struct("struct_t"))); + // Grab the "struct_t" defined in the "foo" function. + DWARFDIE foo_struct_die = + unit->DIE().GetFirstChild().GetFirstChild().GetSibling().GetFirstChild(); + EXPECT_THAT(foo_struct_die.GetTypeLookupContext(), + testing::ElementsAre(make_struct("struct_t"))); +} From c6ee5628a75feeb4fccc8272a68eb8303fb1734b Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Tue, 11 Jun 2024 14:16:11 -0700 Subject: [PATCH 35/38] [HLSL] Fix FileCheck annotation typos (#95155) These are the HLSL specific fixes from #93193. Thanks klensy! --- clang/test/CodeGenHLSL/convergence/for.hlsl | 2 +- clang/test/SemaHLSL/standard_conversion_sequences.hlsl | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenHLSL/convergence/for.hlsl b/clang/test/CodeGenHLSL/convergence/for.hlsl index 180fae74ba7514..95f9a196bdb676 100644 --- a/clang/test/CodeGenHLSL/convergence/for.hlsl +++ b/clang/test/CodeGenHLSL/convergence/for.hlsl @@ -92,7 +92,7 @@ void test6() { // CHECK: [[C1:%[a-zA-Z0-9]+]] = call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ] // CHECK: br i1 [[C1]], label %if.then, label %if.end // CHECK: if.then: -// CHECK call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ] +// CHECK: call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ] // CHECK: br label %for.end // CHECK: if.end: // CHECK: br label %for.inc diff --git a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl index a0d398105f15d6..256981d2c1e2e0 100644 --- a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl +++ b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl @@ -4,9 +4,8 @@ void test() { // CHECK: VarDecl {{.*}} used f3 'vector':'float __attribute__((ext_vector_type(3)))' cinit - // CHECK-NEXt: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' - // CHECK-NEXt: ImplicitCastExpr {{.*}} 'float' - // CHECK-NEXt: FloatingLiteral {{.*}} 'double' 1.000000e+00 + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' + // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 1.000000e+00 vector f3 = 1.0; // No warning for splatting to a vector from a literal. From 438a7d4c982e0a38aaa6544a5ba6736d54600733 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Tue, 11 Jun 2024 14:20:58 -0700 Subject: [PATCH 36/38] [mlir][sparse] expose optimization flags to mini pipeline (#95158) Some of the options only fed into the full sparse pipeline. However, some backends prefer to use the sparse minipipeline. This change exposes some important optimization flags to the pass as well. This prepares some SIMDization of PyTorch sparsified code. --- .../Dialect/SparseTensor/Transforms/Passes.td | 12 ++++++ .../SparsificationAndBufferizationPass.cpp | 39 ++++++++++------- .../SparseTensor/minipipeline_vector.mlir | 43 +++++++++++++++++++ 3 files changed, 78 insertions(+), 16 deletions(-) create mode 100755 mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index c6554e1c94a4a4..196110f55571d2 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -462,6 +462,18 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M "sparse_tensor::SparseTensorDialect", "vector::VectorDialect" ]; + // Important optimization options are made visible to the mini-pipeline + // so that clients can set these (when not using the full pipeline). + let options = [ + Option<"vectorLength", "vl", "int32_t", "0", + "Set the vector length (use 0 to disable vectorization)">, + Option<"enableVLAVectorization", "enable-vla-vectorization", "bool", "false", + "Enable vector length agnostic vectorization">, + Option<"enableSIMDIndex32", "enable-simd-index32", "bool", "false", + "Enable i32 indexing into vectors (for efficient gather/scatter)">, + Option<"enableGPULibgen", "enable-gpu-libgen", "bool", "false", + "Enable GPU acceleration by means of direct library calls">, + ]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index 3a8972072ac3b1..13c750e83d0454 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -61,20 +61,34 @@ class SparsificationAndBufferizationPass : public impl::SparsificationAndBufferizationBase< SparsificationAndBufferizationPass> { public: + // Private pass options only. SparsificationAndBufferizationPass( const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, bool createSparseDeallocs, bool enableRuntimeLibrary, - bool enableBufferInitialization, unsigned vectorLength, - bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen) + bool enableBufferInitialization) : bufferizationOptions(bufferizationOptions), sparsificationOptions(sparsificationOptions), createSparseDeallocs(createSparseDeallocs), enableRuntimeLibrary(enableRuntimeLibrary), - enableBufferInitialization(enableBufferInitialization), - vectorLength(vectorLength), - enableVLAVectorization(enableVLAVectorization), - enableSIMDIndex32(enableSIMDIndex32), enableGPULibgen(enableGPULibgen) { + enableBufferInitialization(enableBufferInitialization) {} + // Private pass options and visible pass options. + SparsificationAndBufferizationPass( + const bufferization::OneShotBufferizationOptions &bufferizationOptions, + const SparsificationOptions &sparsificationOptions, + bool createSparseDeallocs, bool enableRuntimeLibrary, + bool enableBufferInitialization, unsigned vl, bool vla, bool index32, + bool gpu) + : bufferizationOptions(bufferizationOptions), + sparsificationOptions(sparsificationOptions), + createSparseDeallocs(createSparseDeallocs), + enableRuntimeLibrary(enableRuntimeLibrary), + enableBufferInitialization(enableBufferInitialization) { + // Set the visible pass options explicitly. + vectorLength = vl; + enableVLAVectorization = vla; + enableSIMDIndex32 = index32; + enableGPULibgen = gpu; } /// Bufferize all dense ops. This assumes that no further analysis is needed @@ -178,10 +192,6 @@ class SparsificationAndBufferizationPass bool createSparseDeallocs; bool enableRuntimeLibrary; bool enableBufferInitialization; - unsigned vectorLength; - bool enableVLAVectorization; - bool enableSIMDIndex32; - bool enableGPULibgen; }; } // namespace sparse_tensor @@ -213,16 +223,13 @@ mlir::getBufferizationOptionsForSparsification(bool analysisOnly) { std::unique_ptr mlir::createSparsificationAndBufferizationPass() { SparsificationOptions sparseOptions; - return createSparsificationAndBufferizationPass( + return std::make_unique< + mlir::sparse_tensor::SparsificationAndBufferizationPass>( getBufferizationOptionsForSparsification(/*analysisOnly=*/false), sparseOptions, /*createSparseDeallocs=*/false, /*enableRuntimeLibrary=*/false, - /*enableBufferInitialization=*/false, - /*vectorLength=*/0, - /*enableVLAVectorization=*/false, - /*enableSIMDIndex32=*/false, - /*enableGPULibgen=*/false); + /*enableBufferInitialization=*/false); } std::unique_ptr mlir::createSparsificationAndBufferizationPass( diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir new file mode 100755 index 00000000000000..2475aa5139da48 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s --check-prefix=CHECK-NOVEC +// RUN: mlir-opt %s --sparsification-and-bufferization="vl=8" | FileCheck %s --check-prefix=CHECK-VEC + +// Test to ensure we can pass optimization flags into +// the mini sparsification and bufferization pipeline. + +#SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + +#trait_sum_reduction = { + indexing_maps = [ + affine_map<(i) -> (i)>, // a + affine_map<(i) -> ()> // x (scalar out) + ], + iterator_types = ["reduction"], + doc = "x += SUM_i a(i)" +} + +// +// CHECK-NOVEC-LABEL: func.func @sum_reduction +// CHECK-NOVEC: scf.for +// CHECK-NOVEC: arith.addf %{{.*}} %{{.*}} : f32 +// CHECK-NOVEC: } +// +// CHECK-VEC-LABEL: func.func @sum_reduction +// CHECK-VEC: vector.insertelement +// CHECK-VEC: scf.for +// CHECK-VEC: vector.create_mask +// CHECK-VEC: vector.maskedload +// CHECK-VEC: arith.addf %{{.*}} %{{.*}} : vector<8xf32> +// CHECK-VEC: } +// CHECK-VEC: vector.reduction +// +func.func @sum_reduction(%arga: tensor, + %argx: tensor) -> tensor { + %0 = linalg.generic #trait_sum_reduction + ins(%arga: tensor) + outs(%argx: tensor) { + ^bb(%a: f32, %x: f32): + %0 = arith.addf %x, %a : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor +} From 3fce14569fc3611eddca41db055143285244736a Mon Sep 17 00:00:00 2001 From: William Junda Huang Date: Tue, 11 Jun 2024 17:33:20 -0400 Subject: [PATCH 37/38] Revert "Add option to generate additional debug info for expression dereferencing pointer to pointers. #94100" (#95174) The option is causing the binary output to be different when compiled under `-O0`, because it introduce dbg.declare on pseudovariables. Going to change this implementation to use dbg.value instead. --- clang/lib/CodeGen/CGDebugInfo.cpp | 72 ----------- clang/lib/CodeGen/CGDebugInfo.h | 6 - clang/lib/CodeGen/CGExprScalar.cpp | 21 +-- .../test/CodeGenCXX/debug-info-ptr-to-ptr.cpp | 120 ------------------ 4 files changed, 1 insertion(+), 218 deletions(-) delete mode 100644 clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 11e2d549d8a450..99e12da0081afc 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -5746,78 +5746,6 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, Var->addDebugInfo(GVE); } -void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder, - llvm::Instruction *Value, QualType Ty) { - // Only when -g2 or above is specified, debug info for variables will be - // generated. - if (CGM.getCodeGenOpts().getDebugInfo() <= - llvm::codegenoptions::DebugLineTablesOnly) - return; - - llvm::DebugLoc SaveDebugLoc = Builder.getCurrentDebugLocation(); - if (!SaveDebugLoc.get()) - return; - - llvm::DIFile *Unit = SaveDebugLoc->getFile(); - llvm::DIType *Type = getOrCreateType(Ty, Unit); - - // Check if Value is already a declared variable and has debug info, in this - // case we have nothing to do. Clang emits declared variable as alloca, and - // it is loaded upon use, so we identify such pattern here. - if (llvm::LoadInst *Load = dyn_cast(Value)) { - llvm::Value *Var = Load->getPointerOperand(); - // There can be implicit type cast applied on a variable if it is an opaque - // ptr, in this case its debug info may not match the actual type of object - // being used as in the next instruction, so we will need to emit a pseudo - // variable for type-casted value. - auto DeclareTypeMatches = [&](auto *DbgDeclare) { - return DbgDeclare->getVariable()->getType() == Type; - }; - if (any_of(llvm::findDbgDeclares(Var), DeclareTypeMatches) || - any_of(llvm::findDVRDeclares(Var), DeclareTypeMatches)) - return; - } - - // Find the correct location to insert a sequence of instructions to - // materialize Value on the stack. - auto SaveInsertionPoint = Builder.saveIP(); - if (llvm::InvokeInst *Invoke = dyn_cast(Value)) - Builder.SetInsertPoint(Invoke->getNormalDest()->begin()); - else if (llvm::Instruction *Next = Value->getIterator()->getNextNode()) - Builder.SetInsertPoint(Next); - else - Builder.SetInsertPoint(Value->getParent()); - llvm::DebugLoc DL = Value->getDebugLoc(); - if (DL.get()) - Builder.SetCurrentDebugLocation(DL); - else if (!Builder.getCurrentDebugLocation().get()) - Builder.SetCurrentDebugLocation(SaveDebugLoc); - - llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType()); - Address PseudoVarAddr(PseudoVar, Value->getType(), - CharUnits::fromQuantity(PseudoVar->getAlign())); - llvm::LoadInst *Load = Builder.CreateLoad(PseudoVarAddr); - Value->replaceAllUsesWith(Load); - Builder.SetInsertPoint(Load); - Builder.CreateStore(Value, PseudoVarAddr); - - // Emit debug info for materialized Value. - unsigned Line = Builder.getCurrentDebugLocation().getLine(); - unsigned Column = Builder.getCurrentDebugLocation().getCol(); - llvm::DILocalVariable *D = DBuilder.createAutoVariable( - LexicalBlockStack.back(), "", nullptr, 0, Type, false, - llvm::DINode::FlagArtificial); - llvm::DILocation *DIL = - llvm::DILocation::get(CGM.getLLVMContext(), Line, Column, - LexicalBlockStack.back(), CurInlinedAt); - SmallVector Expr; - DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL, - Load); - - Builder.restoreIP(SaveInsertionPoint); - Builder.SetCurrentDebugLocation(SaveDebugLoc); -} - void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index da466837aa3c34..8fe738be215687 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -530,12 +530,6 @@ class CGDebugInfo { /// Emit information about an external variable. void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl); - /// Emit a pseudo variable and debug info for an intermediate value if it does - /// not correspond to a variable in the source code, so that a profiler can - /// track more accurate usage of certain instructions of interest. - void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value, - QualType Ty); - /// Emit information about global variable alias. void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 58f0a3113b4f81..1b144c178ce960 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1937,26 +1937,7 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { } } - llvm::Value *Result = EmitLoadOfLValue(E); - - // If -fdebug-info-for-profiling is specified, emit a pseudo variable and its - // debug info for the pointer, even if there is no variable associated with - // the pointer's expression. - if (CGF.CGM.getCodeGenOpts().DebugInfoForProfiling && CGF.getDebugInfo()) { - if (llvm::LoadInst *Load = dyn_cast(Result)) { - if (llvm::GetElementPtrInst *GEP = - dyn_cast(Load->getPointerOperand())) { - if (llvm::Instruction *Pointer = - dyn_cast(GEP->getPointerOperand())) { - QualType Ty = E->getBase()->getType(); - if (!E->isArrow()) - Ty = CGF.getContext().getPointerType(Ty); - CGF.getDebugInfo()->EmitPseudoVariable(Builder, Pointer, Ty); - } - } - } - } - return Result; + return EmitLoadOfLValue(E); } Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { diff --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp deleted file mode 100644 index baf791487771c0..00000000000000 --- a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// Test debug info for intermediate value of a chained pointer deferencing -// expression when the flag -fdebug-info-for-pointer-type is enabled. -// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu %s -fdebug-info-for-profiling -debug-info-kind=constructor -o - | FileCheck %s - -class A { -public: - int i; - char c; - void *p; - int arr[3]; -}; - -class B { -public: - A* a; -}; - -class C { -public: - B* b; - A* a; - A arr[10]; -}; - -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func1{{.*}}( -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.B, ptr {{%.*}}, i32 0, i32 0, !dbg [[DBG1:![0-9]+]] -// CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[A_ADDR]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: store ptr [[A]], ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META1:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0, -int func1(B *b) { - return b->a->i; -} - -// Should generate a pseudo variable when pointer is type-casted. -// CHECK-LABEL: define dso_local noundef ptr @{{.*}}func2{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META2:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[B:%.*]] = load ptr, ptr [[B_ADDR]], -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[B]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META3:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.B, ptr [[TMP1]], i32 0, -A* func2(void *b) { - return ((B*)b)->a; -} - -// Should not generate pseudo variable in this case. -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func3{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META4:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr [[LOCAL1:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression()) -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr -int func3(B *b) { - A *local1 = b->a; - return local1->i; -} - -// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func4{{.*}}( -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.C, ptr {{%.*}}, i32 0, i32 1 -// CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[A_ADDR]], -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[A]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META6:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0, -// CHECK: [[CALL:%.*]] = call noundef ptr @{{.*}}foo{{.*}}( -// CHECK-NEXT: [[PSEUDO2:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[CALL]], ptr [[PSEUDO2]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO2]], metadata [[META6]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PSEUDO2]] -// CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds %class.A, ptr [[TMP2]], i32 0, i32 1 -char func4(C *c) { - extern A* foo(int x); - return foo(c->a->i)->c; -} - -// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func5{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META7:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META8:![0-9]+]], metadata !DIExpression()) -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.A, ptr {{%.*}}, i64 {{%.*}}, -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META9:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 1, -char func5(void *arr, int n) { - return ((A*)arr)[n].c; -} - -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func6{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META10:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META11:![0-9]+]], metadata !DIExpression()) -int func6(B &b) { - return reinterpret_cast(b).i; -} - -// CHECK-DAG: [[META_A:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A", -// CHECK-DAG: [[META_AP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_A]], -// CHECK-DAG: [[META_B:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B", -// CHECK-DAG: [[META_BP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_B]], -// CHECK-DAG: [[META_C:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", -// CHECK-DAG: [[META_CP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_C]], -// CHECK-DAG: [[META_VP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, -// CHECK-DAG: [[META_I32:![0-9]+]] = !DIBasicType(name: "int", size: 32, -// CHECK-DAG: [[META_BR:![0-9]+]] = !DIDerivedType(tag: DW_TAG_reference_type, baseType: [[META_B]], - -// CHECK-DAG: [[DBG1]] = !DILocation(line: 34, column: 13, -// CHECK-DAG: [[META1]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META2]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 46, type: [[META_VP]]) -// CHECK-DAG: [[META3]] = !DILocalVariable(scope: {{.*}}, type: [[META_BP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META4]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 55, type: [[META_BP]]) -// CHECK-DAG: [[META5]] = !DILocalVariable(name: "local1", scope: {{.*}}, file: {{.*}}, line: 56, type: [[META_AP]]) -// CHECK-DAG: [[META6]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META7]] = !DILocalVariable(name: "arr", arg: 1, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_VP]]) -// CHECK-DAG: [[META8]] = !DILocalVariable(name: "n", arg: 2, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_I32]]) -// CHECK-DAG: [[META9]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META10]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 95, type: [[META_BR]]) -// CHECK-DAG: [[META11]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) From cc04bbb2752a0b2a5e7fb41ed1a9d54fbdd3be89 Mon Sep 17 00:00:00 2001 From: ChiaHungDuan Date: Tue, 11 Jun 2024 14:52:28 -0700 Subject: [PATCH 38/38] [scudo] Fix the calculation of PushedBytesDelta (#95177) BytesInBG is always greater or equal to BG->BytesInBGAtLastCheckpoint. Note that the bug led to unnecessary attempts of page releasing and doesn't have critical impact on the correctness. --- compiler-rt/lib/scudo/standalone/primary64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index bed2ccb8b992a2..8a583bacb4a934 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -1392,7 +1392,7 @@ template class SizeClassAllocator64 { continue; } - const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG; + const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint; // Given the randomness property, we try to release the pages only if the // bytes used by free blocks exceed certain proportion of group size. Note