Skip to content

Commit

Permalink
added libxsmm-dnn parallelization strategy to tpp-run
Browse files Browse the repository at this point in the history
  • Loading branch information
alheinecke authored and Kavitha Madhu committed Feb 19, 2024
1 parent 2756870 commit c69ce46
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 32 deletions.
16 changes: 8 additions & 8 deletions benchmarks/config/omp/mlir-bf16.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
Expand All @@ -36,28 +36,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_dp2_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ "(avx2|asimd)" ]
}
}}
Expand Down
16 changes: 8 additions & 8 deletions benchmarks/config/omp/mlir-fp32.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
Expand All @@ -36,28 +36,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-width=32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ "(avx2|asimd)" ]
}
}}
Expand Down
32 changes: 16 additions & 16 deletions benchmarks/config/omp/torch-dynamo.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ ]
}
}},
Expand All @@ -36,28 +36,28 @@
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ ]
}
}},
Expand All @@ -67,28 +67,28 @@
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ ]
}
}},
Expand All @@ -98,28 +98,28 @@
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"extensions": [ ]
},
"bf16_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-bf16-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='-def-parallel'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"extensions": [ ]
}
}}
Expand Down

0 comments on commit c69ce46

Please sign in to comment.