diff --git a/benchmarks/config/base/base.json b/benchmarks/config/base/base.json index dbd916265..1c4299353 100644 --- a/benchmarks/config/base/base.json +++ b/benchmarks/config/base/base.json @@ -31,42 +31,42 @@ }, "gemm_fp32_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [] }, "gemm_bf16_dp2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "avx2" ] }, "gemm_bf16_dp4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "svebf16" ] }, "mlp_fp32_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [] }, "mlp_bf16_dp2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "avx2" ] }, "mlp_bf16_dp4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "svebf16" ] @@ -76,28 +76,28 @@ "gemm_models": { "fp32_3x1024_const_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_args_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "bf16_3x1024_const_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100"], "extensions": [ "(avx2|asimd)" ] }, "bf16_3x1024_args_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100"], "extensions": [ "(avx2|asimd)" ] @@ -107,28 +107,28 @@ "mlp_models": { "fp32_3x1024_const_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_args_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "bf16_3x1024_const_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100"], "extensions": [ "(avx2|asimd)" ] }, "bf16_3x1024_args_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ], "environment": {}, "flags": [ "-n", "100"], "extensions": [ "(avx2|asimd)" ] diff --git a/benchmarks/config/fc/1024x1024x512.json b/benchmarks/config/fc/1024x1024x512.json index 15ae8e65d..a19df9cdb 100644 --- a/benchmarks/config/fc/1024x1024x512.json +++ b/benchmarks/config/fc/1024x1024x512.json @@ -37,14 +37,14 @@ "fc_1024x1024x512_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_1024x1024x512_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_1024x1024x512_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/1024x2560x1024.json b/benchmarks/config/fc/1024x2560x1024.json index 9b3978393..8db88e17a 100644 --- a/benchmarks/config/fc/1024x2560x1024.json +++ b/benchmarks/config/fc/1024x2560x1024.json @@ -37,14 +37,14 @@ "fc_1024x2560x1024_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_1024x2560x1024_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_1024x2560x1024_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/1024x352x512.json b/benchmarks/config/fc/1024x352x512.json index b6c3ae347..3502704b1 100644 --- a/benchmarks/config/fc/1024x352x512.json +++ b/benchmarks/config/fc/1024x352x512.json @@ -37,14 +37,14 @@ "fc_1024x352x512_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_1024x352x512_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_1024x352x512_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/1024x512x256.json b/benchmarks/config/fc/1024x512x256.json index b3c1e6117..02526aec2 100644 --- a/benchmarks/config/fc/1024x512x256.json +++ b/benchmarks/config/fc/1024x512x256.json @@ -37,14 +37,14 @@ "fc_1024x512x256_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_1024x512x256_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_1024x512x256_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x1024x1024.json b/benchmarks/config/fc/128x1024x1024.json index 434300b47..a39c766e4 100644 --- a/benchmarks/config/fc/128x1024x1024.json +++ b/benchmarks/config/fc/128x1024x1024.json @@ -37,14 +37,14 @@ "fc_128x1024x1024_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x1024x1024_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x1024x1024_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x1024x4096.json b/benchmarks/config/fc/128x1024x4096.json index 18a20de16..91d3d0d3a 100644 --- a/benchmarks/config/fc/128x1024x4096.json +++ b/benchmarks/config/fc/128x1024x4096.json @@ -37,14 +37,14 @@ "fc_128x1024x4096_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x1024x4096_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x1024x4096_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x3072x768.json b/benchmarks/config/fc/128x3072x768.json index 48ae37f2f..74ef3c736 100644 --- a/benchmarks/config/fc/128x3072x768.json +++ b/benchmarks/config/fc/128x3072x768.json @@ -37,14 +37,14 @@ "fc_128x3072x768_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x3072x768_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x3072x768_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x4096x1024.json b/benchmarks/config/fc/128x4096x1024.json index 7641b6e37..78f909abc 100644 --- a/benchmarks/config/fc/128x4096x1024.json +++ b/benchmarks/config/fc/128x4096x1024.json @@ -37,14 +37,14 @@ "fc_128x4096x1024_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x4096x1024_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x4096x1024_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x768x2304.json b/benchmarks/config/fc/128x768x2304.json index db6c853c1..b136d59cb 100644 --- a/benchmarks/config/fc/128x768x2304.json +++ b/benchmarks/config/fc/128x768x2304.json @@ -37,14 +37,14 @@ "fc_128x768x2304_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x768x2304_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x768x2304_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x768x3072.json b/benchmarks/config/fc/128x768x3072.json index 08a581f2b..187f5b9ab 100644 --- a/benchmarks/config/fc/128x768x3072.json +++ b/benchmarks/config/fc/128x768x3072.json @@ -37,14 +37,14 @@ "fc_128x768x3072_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x768x3072_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x768x3072_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/128x768x768.json b/benchmarks/config/fc/128x768x768.json index e40b0d466..e4b2fb3e6 100644 --- a/benchmarks/config/fc/128x768x768.json +++ b/benchmarks/config/fc/128x768x768.json @@ -37,14 +37,14 @@ "fc_128x768x768_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_128x768x768_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_128x768x768_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x1024x1024.json b/benchmarks/config/fc/256x1024x1024.json index 069314099..9cae42455 100644 --- a/benchmarks/config/fc/256x1024x1024.json +++ b/benchmarks/config/fc/256x1024x1024.json @@ -37,14 +37,14 @@ "fc_256x1024x1024_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x1024x1024_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x1024x1024_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x1024x4096.json b/benchmarks/config/fc/256x1024x4096.json index fa0fd5057..6f3d16eab 100644 --- a/benchmarks/config/fc/256x1024x4096.json +++ b/benchmarks/config/fc/256x1024x4096.json @@ -37,14 +37,14 @@ "fc_256x1024x4096_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x1024x4096_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x1024x4096_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x3072x768.json b/benchmarks/config/fc/256x3072x768.json index b05246d16..66dfbc06d 100644 --- a/benchmarks/config/fc/256x3072x768.json +++ b/benchmarks/config/fc/256x3072x768.json @@ -37,14 +37,14 @@ "fc_256x3072x768_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x3072x768_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x3072x768_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x4096x1024.json b/benchmarks/config/fc/256x4096x1024.json index cf49f386b..5df4f8dde 100644 --- a/benchmarks/config/fc/256x4096x1024.json +++ b/benchmarks/config/fc/256x4096x1024.json @@ -37,14 +37,14 @@ "fc_256x4096x1024_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x4096x1024_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x4096x1024_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x768x3072.json b/benchmarks/config/fc/256x768x3072.json index 6110160e2..a5bde928a 100644 --- a/benchmarks/config/fc/256x768x3072.json +++ b/benchmarks/config/fc/256x768x3072.json @@ -37,14 +37,14 @@ "fc_256x768x3072_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x768x3072_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x768x3072_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/fc/256x768x768.json b/benchmarks/config/fc/256x768x768.json index 7c9d47720..eea447d19 100644 --- a/benchmarks/config/fc/256x768x768.json +++ b/benchmarks/config/fc/256x768x768.json @@ -37,14 +37,14 @@ "fc_256x768x768_fp32_mlir": { "fc_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "fc_256x768x768_bf16_dp2_mlir": { "fc_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "fc_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "fc_256x768x768_bf16_dp4_mlir": { "fc_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "fc_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/1024x1024x512.json b/benchmarks/config/matmul/1024x1024x512.json index 7a84374c1..d042d9932 100644 --- a/benchmarks/config/matmul/1024x1024x512.json +++ b/benchmarks/config/matmul/1024x1024x512.json @@ -37,14 +37,14 @@ "matmul_1024x1024x512_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_1024x1024x512_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_1024x1024x512_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/1024x2560x1024.json b/benchmarks/config/matmul/1024x2560x1024.json index 05ac7b3f1..7d3c79e31 100644 --- a/benchmarks/config/matmul/1024x2560x1024.json +++ b/benchmarks/config/matmul/1024x2560x1024.json @@ -37,14 +37,14 @@ "matmul_1024x2560x1024_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_1024x2560x1024_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_1024x2560x1024_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/1024x352x512.json b/benchmarks/config/matmul/1024x352x512.json index 5ebbb9781..43825729a 100644 --- a/benchmarks/config/matmul/1024x352x512.json +++ b/benchmarks/config/matmul/1024x352x512.json @@ -37,14 +37,14 @@ "matmul_1024x352x512_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_1024x352x512_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_1024x352x512_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/1024x512x256.json b/benchmarks/config/matmul/1024x512x256.json index 0121e3ada..541d492eb 100644 --- a/benchmarks/config/matmul/1024x512x256.json +++ b/benchmarks/config/matmul/1024x512x256.json @@ -37,14 +37,14 @@ "matmul_1024x512x256_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_1024x512x256_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_1024x512x256_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x1024x1024.json b/benchmarks/config/matmul/128x1024x1024.json index 9bf8fea91..4b0e38450 100644 --- a/benchmarks/config/matmul/128x1024x1024.json +++ b/benchmarks/config/matmul/128x1024x1024.json @@ -37,14 +37,14 @@ "matmul_128x1024x1024_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x1024x1024_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x1024x1024_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x1024x4096.json b/benchmarks/config/matmul/128x1024x4096.json index 7fcfbdad5..57fff550d 100644 --- a/benchmarks/config/matmul/128x1024x4096.json +++ b/benchmarks/config/matmul/128x1024x4096.json @@ -37,14 +37,14 @@ "matmul_128x1024x4096_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x1024x4096_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x1024x4096_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x3072x768.json b/benchmarks/config/matmul/128x3072x768.json index ba60eda98..d1e252e29 100644 --- a/benchmarks/config/matmul/128x3072x768.json +++ b/benchmarks/config/matmul/128x3072x768.json @@ -37,14 +37,14 @@ "matmul_128x3072x768_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x3072x768_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x3072x768_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x4096x1024.json b/benchmarks/config/matmul/128x4096x1024.json index 8b9988e87..e23dbfa32 100644 --- a/benchmarks/config/matmul/128x4096x1024.json +++ b/benchmarks/config/matmul/128x4096x1024.json @@ -37,14 +37,14 @@ "matmul_128x4096x1024_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x4096x1024_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x4096x1024_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x768x2304.json b/benchmarks/config/matmul/128x768x2304.json index 743be1a41..f3fc16452 100644 --- a/benchmarks/config/matmul/128x768x2304.json +++ b/benchmarks/config/matmul/128x768x2304.json @@ -37,14 +37,14 @@ "matmul_128x768x2304_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x768x2304_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x768x2304_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=2304,768 --tiles=64,48,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x768x3072.json b/benchmarks/config/matmul/128x768x3072.json index 0d7986c56..d1ec88092 100644 --- a/benchmarks/config/matmul/128x768x3072.json +++ b/benchmarks/config/matmul/128x768x3072.json @@ -37,14 +37,14 @@ "matmul_128x768x3072_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x768x3072_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x768x3072_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=3072,768 --tiles=32,48,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/128x768x768.json b/benchmarks/config/matmul/128x768x768.json index 91653dbb1..5f6e40852 100644 --- a/benchmarks/config/matmul/128x768x768.json +++ b/benchmarks/config/matmul/128x768x768.json @@ -37,14 +37,14 @@ "matmul_128x768x768_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_128x768x768_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_128x768x768_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=128 --layers=768,768 --tiles=32,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x1024x1024.json b/benchmarks/config/matmul/256x1024x1024.json index 828a12f73..14371a52b 100644 --- a/benchmarks/config/matmul/256x1024x1024.json +++ b/benchmarks/config/matmul/256x1024x1024.json @@ -37,14 +37,14 @@ "matmul_256x1024x1024_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x1024x1024_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x1024x1024_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=1024,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x1024x4096.json b/benchmarks/config/matmul/256x1024x4096.json index 9dff5f9f5..e7782042e 100644 --- a/benchmarks/config/matmul/256x1024x4096.json +++ b/benchmarks/config/matmul/256x1024x4096.json @@ -37,14 +37,14 @@ "matmul_256x1024x4096_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x1024x4096_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x1024x4096_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=4096,1024 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x3072x768.json b/benchmarks/config/matmul/256x3072x768.json index 0374ca281..c1dec0325 100644 --- a/benchmarks/config/matmul/256x3072x768.json +++ b/benchmarks/config/matmul/256x3072x768.json @@ -37,14 +37,14 @@ "matmul_256x3072x768_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x3072x768_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x3072x768_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=768,3072 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x4096x1024.json b/benchmarks/config/matmul/256x4096x1024.json index 1d1051066..2ff90c36a 100644 --- a/benchmarks/config/matmul/256x4096x1024.json +++ b/benchmarks/config/matmul/256x4096x1024.json @@ -37,14 +37,14 @@ "matmul_256x4096x1024_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x4096x1024_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x4096x1024_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=1024,4096 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x768x3072.json b/benchmarks/config/matmul/256x768x3072.json index 394647de4..2b8cd3195 100644 --- a/benchmarks/config/matmul/256x768x3072.json +++ b/benchmarks/config/matmul/256x768x3072.json @@ -37,14 +37,14 @@ "matmul_256x768x3072_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x768x3072_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x768x3072_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=3072,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/matmul/256x768x768.json b/benchmarks/config/matmul/256x768x768.json index 157942d4c..93f76fe20 100644 --- a/benchmarks/config/matmul/256x768x768.json +++ b/benchmarks/config/matmul/256x768x768.json @@ -37,14 +37,14 @@ "matmul_256x768x768_fp32_mlir": { "matmul_fp32_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "1" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_fp32_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -54,14 +54,14 @@ "matmul_256x768x768_bf16_dp2_mlir": { "matmul_bf16_dp2_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] }, "matmul_bf16_dp2_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=2 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(avx2|asimd)" ] @@ -71,14 +71,14 @@ "matmul_256x768x768_bf16_dp4_mlir": { "matmul_bf16_dp4_single_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": {}, "flags": [ "-n", "100" ], "extensions": [ "(svebf16)" ] }, "matmul_bf16_dp4_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], + "benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --vnni=4 --batch=256 --layers=768,768 --tiles=64,64,64" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='-def-parallel'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/omp/mlir-bf16.json b/benchmarks/config/omp/mlir-bf16.json index c91946b88..009d280cb 100644 --- a/benchmarks/config/omp/mlir-bf16.json +++ b/benchmarks/config/omp/mlir-bf16.json @@ -3,28 +3,28 @@ "gemm_bf16_dp2_mlir": { "bf16_dp2_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(avx2)" ] @@ -34,28 +34,28 @@ "mlp_bf16_dp2_mlir": { "bf16_dp2_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(avx2)" ] }, "bf16_dp2_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(avx2)" ] @@ -65,28 +65,28 @@ "gemm_bf16_dp4_mlir": { "bf16_dp4_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(svebf16)" ] @@ -96,28 +96,28 @@ "mlp_bf16_dp4_mlir": { "bf16_dp4_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(svebf16)" ] }, "bf16_dp4_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(svebf16)" ] diff --git a/benchmarks/config/omp/mlir-fp32.json b/benchmarks/config/omp/mlir-fp32.json index 1ad34561f..68f1faceb 100644 --- a/benchmarks/config/omp/mlir-fp32.json +++ b/benchmarks/config/omp/mlir-fp32.json @@ -3,28 +3,28 @@ "gemm_fp32_mlir": { "fp32_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(avx2|asimd)" ] @@ -34,28 +34,28 @@ "mlp_fp32_mlir": { "fp32_3x1024_omp_2_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_4_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_8_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ], "extensions": [ "(avx2|asimd)" ] }, "fp32_3x1024_omp_16_mlir": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ], "extensions": [ "(avx2|asimd)" ] diff --git a/benchmarks/driver.py b/benchmarks/driver.py index 04b607180..4dd91577c 100755 --- a/benchmarks/driver.py +++ b/benchmarks/driver.py @@ -36,7 +36,7 @@ }, "irgen": { "type": "IR-GEN", - "benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --vnni=2 --batch=64 --layers=64,64 --tiles=32,32,32" ], + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --vnni=2 --batch=64 --layers=64,64 --tiles=32,32,32" ], "environment": { "OMP_NUM_THREADS": "32" }, "flags": [ "-n", "100" ], "extensions": [ "(avx2|asimd)" ] diff --git a/include/TPP/Transforms/Utils/BuilderUtils.h b/include/TPP/Transforms/Utils/BuilderUtils.h index b3be7ee51..2f25cef85 100644 --- a/include/TPP/Transforms/Utils/BuilderUtils.h +++ b/include/TPP/Transforms/Utils/BuilderUtils.h @@ -21,6 +21,7 @@ class TypeRange; class OpBuilder; class Operation; class Value; +class FloatType; namespace func { class FuncOp; } // namespace func @@ -40,7 +41,7 @@ Value createDenseMemref(OpBuilder &, ModuleOp, TensorInitType, MemRefType, int); // Return a ConstantOp of a certain type with a certain initializer Value getConstIndex(OpBuilder &, int); Value getConstInt(OpBuilder &, int, int); -Value getConstFloat(OpBuilder &, float, int); +Value getConstFloat(OpBuilder &, float, FloatType); // Return a typed attribute of specified type and value. // For integer types, the value is rounded toward zero. diff --git a/lib/TPP/Transforms/Utils/BuilderUtils.cpp b/lib/TPP/Transforms/Utils/BuilderUtils.cpp index 10fbf3a7d..007dd8de5 100644 --- a/lib/TPP/Transforms/Utils/BuilderUtils.cpp +++ b/lib/TPP/Transforms/Utils/BuilderUtils.cpp @@ -58,17 +58,8 @@ Value getConstInt(OpBuilder &builder, int value, int width) { } } -Value getConstFloat(OpBuilder &builder, float value, int width) { - switch (width) { - case 16: - return getConstant(builder, builder.getBF16Type(), value); - case 32: - return getConstant(builder, builder.getF32Type(), value); - case 64: - return getConstant(builder, builder.getF64Type(), value); - default: - assert(false && "Invalid constant float size"); - } +Value getConstFloat(OpBuilder &builder, float value, FloatType type) { + return getConstant(builder, type, value); } Value getConstIndex(OpBuilder &builder, int value) { diff --git a/scripts/debug/README.md b/scripts/debug/README.md index dcab55d2e..23d34a34f 100644 --- a/scripts/debug/README.md +++ b/scripts/debug/README.md @@ -30,7 +30,7 @@ Examples: // Generates an MLP with `mlir-gen`, uses `meld` ./scripts/debug/debug_all_passes.sh \ -b ./build/bin \ - -m "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024" \ + -m "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" \ -d meld // Default behaviour, runs `mlir-gen` & `tpp-opt` without args, uses `diff` diff --git a/test/BF16/Integration/mlir-gen-bf16.mlir b/test/BF16/Integration/mlir-gen-bf16.mlir index eb3f725d1..478d563d8 100644 --- a/test/BF16/Integration/mlir-gen-bf16.mlir +++ b/test/BF16/Integration/mlir-gen-bf16.mlir @@ -1,18 +1,18 @@ // MLP without softmax (can't print packed version for now) -// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10,10 --float-width=16 | tpp-run -e entry -entry-point-result=void +// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10,10 --float-type=bf16 | tpp-run -e entry -entry-point-result=void // Matmul only -// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --float-width=16 | tpp-run -e entry -entry-point-result=void +// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --float-type=bf16 | tpp-run -e entry -entry-point-result=void // Kernel - matmul -// RUN: mlir-gen --kernel=args --seed=123 --float-width=16 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-MATMUL-BF16 +// RUN: mlir-gen --kernel=args --seed=123 --float-type=bf16 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-MATMUL-BF16 // Kernel - fc -// RUN: mlir-gen --kernel=args --bias --relu --seed=123 --float-width=16 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-FC-BF16 +// RUN: mlir-gen --kernel=args --bias --relu --seed=123 --float-type=bf16 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-FC-BF16 // BF16/VNNI execution -// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --tiles=2,2,2 --float-width=16 | tpp-run -e entry -entry-point-result=void -n 10 | FileCheck %s --check-prefix=PERF -// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --tiles=2,2,2 --float-width=16 | tpp-opt --pack-vnni | tpp-run -e entry -entry-point-result=void -n 10 | FileCheck %s --check-prefix=PERF +// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --tiles=2,2,2 --float-type=bf16 | tpp-run -e entry -entry-point-result=void -n 10 | FileCheck %s --check-prefix=PERF +// RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --tiles=2,2,2 --float-type=bf16 | tpp-opt --pack-vnni | tpp-run -e entry -entry-point-result=void -n 10 | FileCheck %s --check-prefix=PERF // GEN-MATMUL-BF16: ( 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 ) diff --git a/test/BF16/Integration/mlir-gen-fc-bf16.mlir b/test/BF16/Integration/mlir-gen-fc-bf16.mlir index 3a5b7f33d..6840e78b6 100644 --- a/test/BF16/Integration/mlir-gen-fc-bf16.mlir +++ b/test/BF16/Integration/mlir-gen-fc-bf16.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=0 2>&1 | FileCheck %s --check-prefix=BF16 -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=2 2>&1 | FileCheck %s --check-prefix=DP2 -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=4 2>&1 | FileCheck %s --check-prefix=DP4 +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=0 2>&1 | FileCheck %s --check-prefix=BF16 +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=2 2>&1 | FileCheck %s --check-prefix=DP2 +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=4 2>&1 | FileCheck %s --check-prefix=DP4 // BF16: // RUN{{.*}}tpp-run %s -n {{\d*}} // BF16: // RUN{{.*}}-e entry -entry-point-result=void diff --git a/test/BF16/Integration/mlir-gen-matmul-bf16.mlir b/test/BF16/Integration/mlir-gen-matmul-bf16.mlir index 67b551582..4bf97ebe3 100644 --- a/test/BF16/Integration/mlir-gen-matmul-bf16.mlir +++ b/test/BF16/Integration/mlir-gen-matmul-bf16.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-gen --kernel=args --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=0 2>&1 | FileCheck %s --check-prefix=BF16 -// RUN: mlir-gen --kernel=args --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=2 2>&1 | FileCheck %s --check-prefix=DP2 -// RUN: mlir-gen --kernel=args --seed=0 --float-width=16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=4 2>&1 | FileCheck %s --check-prefix=DP4 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=0 2>&1 | FileCheck %s --check-prefix=BF16 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=2 2>&1 | FileCheck %s --check-prefix=DP2 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 --vnni=4 2>&1 | FileCheck %s --check-prefix=DP4 // BF16: // RUN{{.*}}tpp-run %s -n {{\d*}} // BF16: // RUN{{.*}}-e entry -entry-point-result=void diff --git a/test/Integration/mlir-gen-fc.mlir b/test/Integration/mlir-gen-fc.mlir index 57431b83f..282f3b842 100644 --- a/test/Integration/mlir-gen-fc.mlir +++ b/test/Integration/mlir-gen-fc.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=FP32 +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=FP32 // FP32: // RUN{{.*}}tpp-run %s -n {{\d*}} // FP32: // RUN{{.*}}-e entry -entry-point-result=void diff --git a/test/Integration/mlir-gen-flops.mlir b/test/Integration/mlir-gen-flops.mlir index 5949917d4..fc02237ea 100644 --- a/test/Integration/mlir-gen-flops.mlir +++ b/test/Integration/mlir-gen-flops.mlir @@ -1,19 +1,19 @@ // Unit sizes -// RUN: mlir-gen --kernel=args --seed=0 --float-width=32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=MATMUL-UNIT -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=FC-UNIT -// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-width=32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=MLP-UNIT +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=MATMUL-UNIT +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=f32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=FC-UNIT +// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-type=f32 --batch=1 --layers=1,1 2>&1 | FileCheck %s --check-prefix=MLP-UNIT // Small sizes -// RUN: mlir-gen --kernel=args --seed=0 --float-width=32 --batch=8 --layers=4,16 2>&1 | FileCheck %s --check-prefix=MATMUL-SMALL -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=32 --batch=8 --layers=4,16 2>&1 | FileCheck %s --check-prefix=FC-SMALL -// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-width=32 --batch=8 --layers=4,8,16 2>&1 | FileCheck %s --check-prefix=MLP-SMALL +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f32 --batch=8 --layers=4,16 2>&1 | FileCheck %s --check-prefix=MATMUL-SMALL +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=f32 --batch=8 --layers=4,16 2>&1 | FileCheck %s --check-prefix=FC-SMALL +// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-type=f32 --batch=8 --layers=4,8,16 2>&1 | FileCheck %s --check-prefix=MLP-SMALL // Large sizes + no tiling -// RUN: mlir-gen --kernel=args --seed=0 --float-width=32 --batch=128 --layers=1024,4096 2>&1 | FileCheck %s --check-prefix=MATMUL-LARGE -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=32 --batch=128 --layers=1024,4096 2>&1 | FileCheck %s --check-prefix=FC-LARGE -// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-width=32 --batch=128 --layers=1024,1024,1024 2>&1 | FileCheck %s --check-prefix=MLP-LARGE +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f32 --batch=128 --layers=1024,4096 2>&1 | FileCheck %s --check-prefix=MATMUL-LARGE +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=f32 --batch=128 --layers=1024,4096 2>&1 | FileCheck %s --check-prefix=FC-LARGE +// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-type=f32 --batch=128 --layers=1024,1024,1024 2>&1 | FileCheck %s --check-prefix=MLP-LARGE // Large sizes + tiling -// RUN: mlir-gen --kernel=args --seed=0 --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=MATMUL-LARGE -// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-width=32 --batch=128 --layers=1024,4096 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=FC-LARGE -// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-width=32 --batch=128 --layers=1024,1024,1024 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=MLP-LARGE +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=MATMUL-LARGE +// RUN: mlir-gen --kernel=args --bias --relu --seed=0 --float-type=f32 --batch=128 --layers=1024,4096 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=FC-LARGE +// RUN: mlir-gen --kernel=const --bias --relu --seed=0 --float-type=f32 --batch=128 --layers=1024,1024,1024 --tiles=64,64,64 2>&1 | FileCheck %s --check-prefix=MLP-LARGE // Validate that flops are computed correctly // MATMUL-UNIT: // BENCH_TOTAL_FLOPS: 2 diff --git a/test/Integration/mlir-gen-matmul.mlir b/test/Integration/mlir-gen-matmul.mlir index d0278c85c..e6c566bda 100644 --- a/test/Integration/mlir-gen-matmul.mlir +++ b/test/Integration/mlir-gen-matmul.mlir @@ -1,4 +1,6 @@ -// RUN: mlir-gen --kernel=args --seed=0 --float-width=32 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=FP32 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f32 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=FP32 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=bf16 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=BF16 +// RUN: mlir-gen --kernel=args --seed=0 --float-type=f16 --batch=128 --layers=2304,768 --tiles=64,48,64 2>&1 | FileCheck %s --check-prefix=FP16 // FP32: // RUN{{.*}}tpp-run %s -n {{\d*}} // FP32: // RUN{{.*}}-e entry -entry-point-result=void @@ -12,3 +14,29 @@ // FP32: arith.mulf // FP32: arith.addf // FP32-NOT: dealloc + +// BF16: // RUN{{.*}}tpp-run %s -n {{\d*}} +// BF16: // RUN{{.*}}-e entry -entry-point-result=void +// BF16: // BENCH_TOTAL_FLOPS: 452984832 +// BF16-DAG: #map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> +// BF16-DAG: #map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)> +// BF16-DAG: #map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> +// BF16: func.func @entry(%arg0: tensor<2x36x64x64xbf16>, %arg1: tensor<16x36x64x48xbf16>, %arg2: tensor<2x16x64x48xbf16>) -> tensor<2x16x64x48xbf16> +// BF16-NOT: alloc +// BF16: linalg.generic {{.*}}iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] +// BF16: arith.mulf +// BF16: arith.addf +// BF16-NOT: dealloc + +// FP16: // RUN{{.*}}tpp-run %s -n {{\d*}} +// FP16: // RUN{{.*}}-e entry -entry-point-result=void +// FP16: // BENCH_TOTAL_FLOPS: 452984832 +// FP16-DAG: #map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> +// FP16-DAG: #map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)> +// FP16-DAG: #map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> +// FP16: func.func @entry(%arg0: tensor<2x36x64x64xf16>, %arg1: tensor<16x36x64x48xf16>, %arg2: tensor<2x16x64x48xf16>) -> tensor<2x16x64x48xf16> +// FP16-NOT: alloc +// FP16: linalg.generic {{.*}}iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] +// FP16: arith.mulf +// FP16: arith.addf +// FP16-NOT: dealloc diff --git a/test/Integration/mlir-gen.mlir b/test/Integration/mlir-gen.mlir index 1ed05c6b9..1f2e4b879 100644 --- a/test/Integration/mlir-gen.mlir +++ b/test/Integration/mlir-gen.mlir @@ -11,10 +11,10 @@ // RUN: mlir-gen --kernel=const --bias --relu --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=CONSTANT // Kernel - matmul -// RUN: mlir-gen --kernel=args --seed=123 --float-width=32 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-MATMUL +// RUN: mlir-gen --kernel=args --seed=123 --float-type=f32 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-MATMUL // Kernel - fc -// RUN: mlir-gen --kernel=args --bias --relu --seed=123 --float-width=32 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-FC +// RUN: mlir-gen --kernel=args --bias --relu --seed=123 --float-type=f32 --batch=10 --layers=10,10 | tpp-run -e entry -entry-point-result=void -print | FileCheck %s --check-prefix=GEN-FC // Packed versions // RUN: mlir-gen --kernel=const --bias --relu --seed=123 --batch=10 --layers=10,10 --tiles=2,2,2 | tpp-run -e entry -entry-point-result=void -n 10 | FileCheck %s --check-prefix=PERF diff --git a/tools/mlir-gen/MLIRGen.cpp b/tools/mlir-gen/MLIRGen.cpp index 0e075c656..cc336f97f 100644 --- a/tools/mlir-gen/MLIRGen.cpp +++ b/tools/mlir-gen/MLIRGen.cpp @@ -19,6 +19,8 @@ #include "MLIRGen.h" #include "llvm/Support/ErrorHandling.h" +#include + using namespace mlir; namespace { @@ -41,7 +43,7 @@ void parseStringList(StringRef str, SmallVector &list) { MLIRGenerator::MLIRGenerator(StringRef kernelStr, unsigned batch, StringRef layersStr, StringRef tilesStr, - unsigned typeWidth, int seed, bool enableBias, + StringRef targetType, int seed, bool enableBias, bool enableRelu, bool enableSoftmax, int vnniBlockingFactor) : builder(&context), loc(builder.getUnknownLoc()), batch(batch), seed(seed), @@ -76,17 +78,13 @@ MLIRGenerator::MLIRGenerator(StringRef kernelStr, unsigned batch, "Must have 3 tile sizes (or none)"); // Pick data type - switch (typeWidth) { - case 32: - dataType = builder.getF32Type(); - break; - case 16: - dataType = builder.getBF16Type(); - break; - default: - assert(false && "Unsupported type width"); - return; - } + auto elementType = llvm::StringSwitch>(targetType) + .CaseLower("f32", builder.getF32Type()) + .CaseLower("f16", builder.getF16Type()) + .CaseLower("bf16", builder.getBF16Type()) + .Default(std::nullopt); + assert(elementType && "Unsupported data type"); + dataType = *elementType; // Disable VNNI packing if it is not BF16 data type if (!dataType.isBF16()) @@ -340,7 +338,7 @@ Value MLIRGenerator::lowerRelu(Value input, Value output) { if (!enableRelu) return input; - auto zero = getConstFloat(builder, 0.0, dataType.getIntOrFloatBitWidth()); + auto zero = getConstFloat(builder, 0.0, dataType.cast()); auto outTy = input.getType().cast(); auto map = getMap(input, MAP_PARALLEL); auto relu = @@ -392,7 +390,7 @@ Value MLIRGenerator::lowerSoftmax(Value input, Value output) { auto redTy = getShape(dims, PACK_OUTPUT); Value redTensor = builder.create(loc, dims, outTy.getElementType()); - auto zero = getConstFloat(builder, 0.0, dataType.getIntOrFloatBitWidth()); + auto zero = getConstFloat(builder, 0.0, dataType.cast()); auto fill = builder.create(loc, zero, redTensor); auto redux = builder.create( loc, redTy, ValueRange{exp.getResult(0)}, ValueRange{fill.getResult(0)}, @@ -631,7 +629,7 @@ int MLIRGenerator::getRand() { } Value MLIRGenerator::getZeroInitTensor(TensorType type) { - auto zero = getConstFloat(builder, 0.0, dataType.getIntOrFloatBitWidth()); + auto zero = getConstFloat(builder, 0.0, dataType.cast()); Value tensor = builder.create(loc, type, ValueRange{}).getResult(); tensor = builder.create(loc, zero, tensor).getResult(0); diff --git a/tools/mlir-gen/MLIRGen.h b/tools/mlir-gen/MLIRGen.h index fe78aa22a..f7cfc2e21 100644 --- a/tools/mlir-gen/MLIRGen.h +++ b/tools/mlir-gen/MLIRGen.h @@ -189,7 +189,7 @@ class MLIRGenerator { /// Creates a specific module. Different configurations need different modules /// so should create new objects to not have to share / cleanup existing MLIR /// modules. - MLIRGenerator(StringRef, unsigned, StringRef, StringRef, unsigned, int, bool, + MLIRGenerator(StringRef, unsigned, StringRef, StringRef, StringRef, int, bool, bool, bool, int); ~MLIRGenerator() { module->destroy(); } diff --git a/tools/mlir-gen/generate.sh b/tools/mlir-gen/generate.sh index 495b46cfc..4197586d2 100755 --- a/tools/mlir-gen/generate.sh +++ b/tools/mlir-gen/generate.sh @@ -126,7 +126,7 @@ debug "Random seed: $SEED" # Defaults # Command line to extract and run -MLIR_GEN_ARGS="--float-width=$FLOAT_SIZE --seed=$SEED --batch=$MB --layers=$LAYER_SIZES --tiles=$TILE_SIZES" +MLIR_GEN_ARGS="--float-type=$FLOAT_SIZE --seed=$SEED --batch=$MB --layers=$LAYER_SIZES --tiles=$TILE_SIZES" ORIG_MLIR="mlir-gen-original.mlir" debug "\nCreating the original MLIR model:" run "$MLIR_GEN $MLIR_GEN_ARGS" $ORIG_MLIR diff --git a/tools/mlir-gen/mlir-gen.cpp b/tools/mlir-gen/mlir-gen.cpp index a7f5c27de..56bf178cb 100644 --- a/tools/mlir-gen/mlir-gen.cpp +++ b/tools/mlir-gen/mlir-gen.cpp @@ -47,11 +47,10 @@ llvm::cl::opt llvm::cl::desc("Comma-separated values of size of each tile (N,K,C)"), llvm::cl::value_desc("32,32,32"), llvm::cl::init("")); -// Float width -llvm::cl::opt floatWidth("float-width", - llvm::cl::desc("Bitsize of float type"), - llvm::cl::value_desc("32|16"), - llvm::cl::init(32)); +// Float type +llvm::cl::opt + floatType("float-type", llvm::cl::desc("Float type and its bitsize"), + llvm::cl::value_desc("f32|f16|bf16"), llvm::cl::init("f32")); // Random seed llvm::cl::opt seed("seed", llvm::cl::desc("Random seed"), @@ -93,7 +92,7 @@ int main(int argc, char **argv) { llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR Generator"); - MLIRGenerator gen(kernel, batch, layers, tiles, floatWidth, seed, enableBias, + MLIRGenerator gen(kernel, batch, layers, tiles, floatType, seed, enableBias, enableRelu, enableSoftmax, vnni); return gen.generate(filename); }