Skip to content

Commit

Permalink
[mlir-gen] Add Float16 type support (#901)
Browse files Browse the repository at this point in the history
Allows IR generation with f16 data type.

The new type is mostly relevant for GPU testing as f16 is a common
target for hardware accelerated operations.

Required changes are propagated around as extra differentiation is now
needed to select between f16 and bf16.
  • Loading branch information
adam-smnk authored Mar 12, 2024
1 parent 887d4a2 commit b04f662
Show file tree
Hide file tree
Showing 52 changed files with 327 additions and 310 deletions.
28 changes: 14 additions & 14 deletions benchmarks/config/base/base.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,42 +31,42 @@
},
"gemm_fp32_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": []
},
"gemm_bf16_dp2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "avx2" ]
},
"gemm_bf16_dp4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "svebf16" ]
},
"mlp_fp32_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": []
},
"mlp_bf16_dp2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "avx2" ]
},
"mlp_bf16_dp4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "svebf16" ]
Expand All @@ -76,28 +76,28 @@
"gemm_models": {
"fp32_3x1024_const_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_const_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100"],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=args --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100"],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -107,28 +107,28 @@
"mlp_models": {
"fp32_3x1024_const_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_const_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100"],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100"],
"extensions": [ "(avx2|asimd)" ]
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/config/fc/1024x1024x512.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
"fc_1024x1024x512_fp32_mlir": {
"fc_fp32_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "1" },
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_fp32_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -54,14 +54,14 @@
"fc_1024x1024x512_bf16_dp2_mlir": {
"fc_bf16_dp2_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_bf16_dp2_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -71,14 +71,14 @@
"fc_1024x1024x512_bf16_dp4_mlir": {
"fc_bf16_dp4_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(svebf16)" ]
},
"fc_bf16_dp4_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(svebf16)" ]
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/config/fc/1024x2560x1024.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
"fc_1024x2560x1024_fp32_mlir": {
"fc_fp32_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "1" },
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_fp32_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -54,14 +54,14 @@
"fc_1024x2560x1024_bf16_dp2_mlir": {
"fc_bf16_dp2_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_bf16_dp2_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -71,14 +71,14 @@
"fc_1024x2560x1024_bf16_dp4_mlir": {
"fc_bf16_dp4_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(svebf16)" ]
},
"fc_bf16_dp4_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=1024,2560 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(svebf16)" ]
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/config/fc/1024x352x512.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
"fc_1024x352x512_fp32_mlir": {
"fc_fp32_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "1" },
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_fp32_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -54,14 +54,14 @@
"fc_1024x352x512_bf16_dp2_mlir": {
"fc_bf16_dp2_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_bf16_dp2_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -71,14 +71,14 @@
"fc_1024x352x512_bf16_dp4_mlir": {
"fc_bf16_dp4_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(svebf16)" ]
},
"fc_bf16_dp4_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=512,352 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(svebf16)" ]
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/config/fc/1024x512x256.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
"fc_1024x512x256_fp32_mlir": {
"fc_fp32_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "1" },
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_fp32_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=32 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -54,14 +54,14 @@
"fc_1024x512x256_bf16_dp2_mlir": {
"fc_bf16_dp2_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fc_bf16_dp2_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=2 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(avx2|asimd)" ]
Expand All @@ -71,14 +71,14 @@
"fc_1024x512x256_bf16_dp4_mlir": {
"fc_bf16_dp4_single_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(svebf16)" ]
},
"fc_bf16_dp4_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-width=16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=bf16 --vnni=4 --batch=1024 --layers=256,512 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"extensions": [ "(svebf16)" ]
Expand Down
Loading

0 comments on commit b04f662

Please sign in to comment.