-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
1,496 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
flambda-backend/tests/backend/vectorizer/test_arrays.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
add_arrays_unrolled_manually 17 18 19 20 21 22 23 24 25 26 | ||
add_arrays_unrolled_safe 17 18 19 20 21 22 23 24 25 26 | ||
add_arrays_rec_unrolled_attribute 17 18 19 20 21 22 23 24 25 26 | ||
add_arrays_for 17 18 19 20 21 22 23 24 25 26 | ||
add_arrays_rec 17 18 19 20 21 22 23 24 25 26 | ||
initialize_array_const_unrolled_manually 0 0 0 0 0 0 0 0 0 0 | ||
initialize_arrays_const_unrolled_manually 0 0 0 0 0 0 0 0 0 0 | ||
initialize_array_unrolled_manually 17 17 17 17 17 17 17 17 17 17 | ||
initialize_floatarray_unrolled_manually 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 |
141 changes: 141 additions & 0 deletions
141
flambda-backend/tests/backend/vectorizer/test_arrays.ml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
let[@inline never] [@local never] [@specialize never] add_arrays_unrolled_manually | ||
a b c n = | ||
for i = 0 to (n / 2) - 1 do | ||
Array.unsafe_set c (i * 2) | ||
(Array.unsafe_get a (i * 2) + Array.unsafe_get b (i * 2)); | ||
Array.unsafe_set c | ||
((i * 2) + 1) | ||
(Array.unsafe_get a ((i * 2) + 1) + Array.unsafe_get b ((i * 2) + 1)) | ||
done; | ||
if Int.rem n 2 = 1 | ||
then | ||
Array.unsafe_set c (n - 1) | ||
(Array.unsafe_get a (n - 1) + Array.unsafe_get b (n - 1)) | ||
|
||
(* Currently won't be vectorized. Can vectorize it but it's not worth it | ||
according to our cost model. It will be vectorized when we add vectors beyond | ||
128 or arrays of elements smaller than 64-bit. *) | ||
let[@inline never] [@local never] [@specialize never] initialize_array_const_unrolled_manually | ||
arr n = | ||
let i = ref 0 in | ||
while !i < n do | ||
Array.unsafe_set arr !i 0; | ||
Array.unsafe_set arr (!i + 1) 0; | ||
i := !i + 2 | ||
done | ||
|
||
(* Currently, won't be vectorized. If different groups can reuse the new | ||
register that holds the constants, this will be worth vectorizing even with | ||
128-bit vectors. *) | ||
let[@inline never] [@local never] [@specialize never] initialize_arrays_const_unrolled_manually | ||
a b c n = | ||
let i = ref 0 in | ||
while !i < n do | ||
Array.unsafe_set a !i 0; | ||
Array.unsafe_set a (!i + 1) 0; | ||
Array.unsafe_set b !i 0; | ||
Array.unsafe_set b (!i + 1) 0; | ||
Array.unsafe_set c !i 0; | ||
Array.unsafe_set c (!i + 1) 0; | ||
i := !i + 2 | ||
done | ||
|
||
(* Currently, won't be vectorized. Shuffling values into a vector is not yet | ||
supported, only vector loads are. Also not worth it unless the shuffle is | ||
outside the loop (loop invariant detection/motion would be needed for it). *) | ||
let[@inline never] [@local never] [@specialize never] initialize_array_unrolled_manually | ||
arr n (v : int) = | ||
let i = ref 0 in | ||
while !i < n do | ||
Array.unsafe_set arr !i v; | ||
Array.unsafe_set arr (!i + 1) v; | ||
i := !i + 2 | ||
done | ||
|
||
(* same as [initialize_array_unrolled_manually] except needs movddup. *) | ||
let[@inline never] [@local never] [@specialize never] initialize_floatarray_unrolled_manually | ||
arr n (v : float) = | ||
let i = ref 0 in | ||
while !i < n do | ||
Array.unsafe_set arr !i v; | ||
Array.unsafe_set arr (!i + 1) v; | ||
i := !i + 2 | ||
done | ||
|
||
(* cannot vectorize across basic blocks *) | ||
let[@inline never] [@local never] [@specialize never] add_arrays_unrolled_safe a | ||
b c n = | ||
for i = 0 to n - 1 do | ||
Array.set c (i * 2) (Array.get a (i * 2) + Array.get b (i * 2)); | ||
Array.set c | ||
((i * 2) + 1) | ||
(Array.get a ((i * 2) + 1) + Array.get b ((i * 2) + 1)) | ||
done | ||
|
||
(* cannot vectorize across basic blocks. unroll attribute is not sufficient to | ||
eliminate the loop condition from the unrolled body (e.g., we would need to | ||
track the fact that the bound is even. *) | ||
let[@inline never] [@local never] [@specialize never] add_arrays_rec_unrolled_attribute | ||
a b c n = | ||
let[@loop never] rec loop i a b c n = | ||
if i < n | ||
then ( | ||
Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i); | ||
(loop [@unrolled 1]) (i + 1) a b c n) | ||
in | ||
loop 0 a b c (2 * n) | ||
|
||
(* cannot vectorizer for-loops *) | ||
let[@inline never] [@local never] [@specialize never] add_arrays_for a b c n = | ||
for i = 0 to n - 1 do | ||
Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i) | ||
done | ||
|
||
(* cannot vectorizer loops expressed using recursion *) | ||
let[@inline never] [@local never] [@specialize never] add_arrays_rec a b c n = | ||
let rec loop i = | ||
if i < n | ||
then ( | ||
Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i); | ||
loop (i + 1)) | ||
in | ||
loop 0 | ||
|
||
let print_array ppf a = | ||
let count = Array.length a in | ||
for i = 0 to count - 1 do | ||
Format.fprintf ppf "%d " a.(i) | ||
done | ||
|
||
let print_floatarray ppf a = | ||
let count = Array.length a in | ||
for i = 0 to count - 1 do | ||
Format.fprintf ppf "%f " a.(i) | ||
done | ||
|
||
let () = | ||
let n = Sys.opaque_identity 10 in | ||
let a = Array.init n (fun i -> i) in | ||
let b = Array.make n 17 in | ||
let c = Array.make n 0 in | ||
let d = Array.make n 0.0 in | ||
add_arrays_unrolled_manually a b c (Sys.opaque_identity n); | ||
Format.printf "add_arrays_unrolled_manually %a\n" print_array c; | ||
add_arrays_unrolled_safe a b c (Sys.opaque_identity (n / 2)); | ||
Format.printf "add_arrays_unrolled_safe %a\n" print_array c; | ||
add_arrays_rec_unrolled_attribute a b c (n / 2); | ||
Format.printf "add_arrays_rec_unrolled_attribute %a\n" print_array c; | ||
add_arrays_for a b c n; | ||
Format.printf "add_arrays_for %a\n" print_array c; | ||
add_arrays_rec a b c n; | ||
Format.printf "add_arrays_rec %a\n" print_array c; | ||
initialize_array_const_unrolled_manually c n; | ||
Format.printf "initialize_array_const_unrolled_manually %a\n" print_array c; | ||
initialize_arrays_const_unrolled_manually a b c n; | ||
Format.printf "initialize_arrays_const_unrolled_manually %a\n" print_array c; | ||
initialize_array_unrolled_manually c n (Sys.opaque_identity 17); | ||
Format.printf "initialize_array_unrolled_manually %a\n" print_array c; | ||
initialize_floatarray_unrolled_manually d n (Sys.opaque_identity 7.7); | ||
Format.printf "initialize_floatarray_unrolled_manually %a\n" print_floatarray | ||
d; | ||
() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
(* blank, make sure all the functions are called from top-level *) |
1 change: 1 addition & 0 deletions
1
flambda-backend/tests/backend/vectorizer/test_arrays_vectorized.cmx.dump.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test_arrays_vectorized.add_arrays_unrolled_manually) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
add_mutable_record { d0 = 88.000000 ; d1 = 110.000000 } | ||
copy_mutable_record { d0 = 88.000000 ; d1 = 110.000000 } | ||
add_mutable_record_fresh { d0 = 88.000000 ; d1 = 110.000000 } | ||
copy_mutable_record_fresh { d0 = 88.000000 ; d1 = 110.000000 } | ||
add_mutable_record_t4 { d0 = 88.000000 ; d1 = 110.000000; d2 = 88.000000 ; d3 = 110.000000 } | ||
copy_mutable_record_t4 { d0 = 8.000000 ; d1 = 96.000000; d2 = 80.000000 ; d3 = 14.000000 } | ||
dup_mutable_record_t4 { d0 = 8.000000 ; d1 = 96.000000; d2 = 8.000000 ; d3 = 96.000000 } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
[@@@ocaml.warnerror "+a-40-41-42"] | ||
|
||
type t1 = | ||
{ mutable d0 : float; | ||
mutable d1 : float | ||
} | ||
|
||
let[@inline never] [@local never] [@specialize never] add_mutable_record | ||
(a : t1) (b : t1) (c : t1) : t1 = | ||
c.d0 <- Float.add a.d0 b.d0; | ||
c.d1 <- Float.add a.d1 b.d1; | ||
c | ||
|
||
let[@inline never] [@local never] [@specialize never] copy_mutable_record | ||
(a : t1) (b : t1) : t1 = | ||
b.d0 <- a.d0; | ||
b.d1 <- a.d1; | ||
b | ||
|
||
let[@inline never] [@local never] [@specialize never] add_mutable_record_fresh | ||
(a : t1) (b : t1) : t1 = | ||
{ d0 = Float.add a.d0 b.d0; d1 = Float.add a.d1 b.d1 } | ||
|
||
let[@inline never] [@local never] [@specialize never] copy_mutable_record_fresh | ||
(a : t1) : t1 = | ||
{ d0 = a.d0; d1 = a.d1 } | ||
|
||
type t4 = | ||
{ mutable d0 : float; | ||
mutable d1 : float; | ||
mutable d2 : float; | ||
mutable d3 : float | ||
} | ||
|
||
let[@inline never] [@local never] [@specialize never] add_mutable_record_t4 | ||
(a : t1) (b : t1) (c : t4) : t4 = | ||
c.d0 <- Float.add a.d0 b.d0; | ||
c.d1 <- Float.add a.d1 b.d1; | ||
c.d2 <- Float.add a.d0 b.d0; | ||
c.d3 <- Float.add a.d1 b.d1; | ||
c | ||
|
||
let[@inline never] [@local never] [@specialize never] copy_mutable_record_t4 | ||
(a : t1) (b : t1) : t4 = | ||
{ d0 = a.d0; d1 = a.d1; d2 = b.d0; d3 = b.d1 } | ||
|
||
let[@inline never] [@local never] [@specialize never] dup_mutable_record_t4 | ||
(a : t1) : t4 = | ||
{ d0 = a.d0; d1 = a.d1; d2 = a.d0; d3 = a.d1 } | ||
|
||
let print_t1 ppf (t1 : t1) = | ||
Format.fprintf ppf "{ d0 = %f ; d1 = %f }" t1.d0 t1.d1 | ||
|
||
let print_t4 ppf (t4 : t4) = | ||
Format.fprintf ppf "{ d0 = %f ; d1 = %f; d2 = %f ; d3 = %f }" t4.d0 t4.d1 | ||
t4.d2 t4.d3 | ||
|
||
let () = | ||
let a = { d0 = 8.; d1 = 96. } in | ||
let b = { d0 = 80.; d1 = 14. } in | ||
let c = { d0 = 10.; d1 = -10. } in | ||
let t4 = { d0 = 10.; d1 = -10.; d2 = 199.; d3 = 18. } in | ||
let res = { d0 = 0.; d1 = -0. } in | ||
Format.printf "add_mutable_record %a\n" print_t1 (add_mutable_record a b c); | ||
Format.printf "copy_mutable_record %a\n" print_t1 (copy_mutable_record c res); | ||
Format.printf "add_mutable_record_fresh %a\n" print_t1 | ||
(add_mutable_record_fresh a b); | ||
Format.printf "copy_mutable_record_fresh %a\n" print_t1 | ||
(copy_mutable_record_fresh c); | ||
Format.printf "add_mutable_record_t4 %a\n" print_t4 | ||
(add_mutable_record_t4 a b t4); | ||
Format.printf "copy_mutable_record_t4 %a\n" print_t4 | ||
(copy_mutable_record_t4 a b); | ||
Format.printf "dup_mutable_record_t4 %a\n" print_t4 (dup_mutable_record_t4 a); | ||
() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
(* blank, make sure all the functions are called from top-level *) |
5 changes: 5 additions & 0 deletions
5
flambda-backend/tests/backend/vectorizer/test_float32_unboxed.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
add_unboxed_pairs_mutable_record { d0 = 88. ; d1 = 110.; d2 = 0. ; d3 = -1. } | ||
copy_unboxed_pairs_mutable_record { d0 = 88. ; d1 = 110.; d2 = 0. ; d3 = -1. } | ||
copy_bytes 10. 10. 10. 10. | ||
copy_bytes_pos 10. 10. 10. 10. | ||
copy_bytes_pos_v2 10. 10. 10. 10. |
Oops, something went wrong.