Add tests

ocaml-flambda · Jan 9, 2025 · 2c72dca · 2c72dca
1 parent c048920
commit 2c72dca
Show file tree

Hide file tree

Showing 30 changed files with 1,496 additions and 0 deletions.
diff --git a/flambda-backend/tests/backend/vectorizer/dune.inc b/flambda-backend/tests/backend/vectorizer/dune.inc
diff --git a/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml b/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml
@@ -170,6 +170,13 @@ let print_test ?(filter_exit_code = 0) name =
 
 let () =
   print_test "test1";
+  print_test "test_arrays";
+  print_test "test_int64_unboxed";
+  print_test "test_float_unboxed";
+  print_test "test_int64";
+  print_test "test_float";
+  print_test "test_float32_unboxed";
+  print_test "test_int32_unboxed";
   (* can't vectorize *)
   print_test ~filter_exit_code:1 "test_register_compatible";
   ()
diff --git a/flambda-backend/tests/backend/vectorizer/test_arrays.expected b/flambda-backend/tests/backend/vectorizer/test_arrays.expected
@@ -0,0 +1,9 @@
+add_arrays_unrolled_manually 17 18 19 20 21 22 23 24 25 26 
+add_arrays_unrolled_safe 17 18 19 20 21 22 23 24 25 26 
+add_arrays_rec_unrolled_attribute 17 18 19 20 21 22 23 24 25 26 
+add_arrays_for 17 18 19 20 21 22 23 24 25 26 
+add_arrays_rec 17 18 19 20 21 22 23 24 25 26 
+initialize_array_const_unrolled_manually 0 0 0 0 0 0 0 0 0 0 
+initialize_arrays_const_unrolled_manually 0 0 0 0 0 0 0 0 0 0 
+initialize_array_unrolled_manually 17 17 17 17 17 17 17 17 17 17 
+initialize_floatarray_unrolled_manually 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 
diff --git a/flambda-backend/tests/backend/vectorizer/test_arrays.ml b/flambda-backend/tests/backend/vectorizer/test_arrays.ml
@@ -0,0 +1,141 @@
+let[@inline never] [@local never] [@specialize never] add_arrays_unrolled_manually
+    a b c n =
+  for i = 0 to (n / 2) - 1 do
+    Array.unsafe_set c (i * 2)
+      (Array.unsafe_get a (i * 2) + Array.unsafe_get b (i * 2));
+    Array.unsafe_set c
+      ((i * 2) + 1)
+      (Array.unsafe_get a ((i * 2) + 1) + Array.unsafe_get b ((i * 2) + 1))
+  done;
+  if Int.rem n 2 = 1
+  then
+    Array.unsafe_set c (n - 1)
+      (Array.unsafe_get a (n - 1) + Array.unsafe_get b (n - 1))
+
+(* Currently won't be vectorized. Can vectorize it but it's not worth it
+   according to our cost model. It will be vectorized when we add vectors beyond
+   128 or arrays of elements smaller than 64-bit. *)
+let[@inline never] [@local never] [@specialize never] initialize_array_const_unrolled_manually
+    arr n =
+  let i = ref 0 in
+  while !i < n do
+    Array.unsafe_set arr !i 0;
+    Array.unsafe_set arr (!i + 1) 0;
+    i := !i + 2
+  done
+
+(* Currently, won't be vectorized. If different groups can reuse the new
+   register that holds the constants, this will be worth vectorizing even with
+   128-bit vectors. *)
+let[@inline never] [@local never] [@specialize never] initialize_arrays_const_unrolled_manually
+    a b c n =
+  let i = ref 0 in
+  while !i < n do
+    Array.unsafe_set a !i 0;
+    Array.unsafe_set a (!i + 1) 0;
+    Array.unsafe_set b !i 0;
+    Array.unsafe_set b (!i + 1) 0;
+    Array.unsafe_set c !i 0;
+    Array.unsafe_set c (!i + 1) 0;
+    i := !i + 2
+  done
+
+(* Currently, won't be vectorized. Shuffling values into a vector is not yet
+   supported, only vector loads are. Also not worth it unless the shuffle is
+   outside the loop (loop invariant detection/motion would be needed for it). *)
+let[@inline never] [@local never] [@specialize never] initialize_array_unrolled_manually
+    arr n (v : int) =
+  let i = ref 0 in
+  while !i < n do
+    Array.unsafe_set arr !i v;
+    Array.unsafe_set arr (!i + 1) v;
+    i := !i + 2
+  done
+
+(* same as [initialize_array_unrolled_manually] except needs movddup. *)
+let[@inline never] [@local never] [@specialize never] initialize_floatarray_unrolled_manually
+    arr n (v : float) =
+  let i = ref 0 in
+  while !i < n do
+    Array.unsafe_set arr !i v;
+    Array.unsafe_set arr (!i + 1) v;
+    i := !i + 2
+  done
+
+(* cannot vectorize across basic blocks *)
+let[@inline never] [@local never] [@specialize never] add_arrays_unrolled_safe a
+    b c n =
+  for i = 0 to n - 1 do
+    Array.set c (i * 2) (Array.get a (i * 2) + Array.get b (i * 2));
+    Array.set c
+      ((i * 2) + 1)
+      (Array.get a ((i * 2) + 1) + Array.get b ((i * 2) + 1))
+  done
+
+(* cannot vectorize across basic blocks. unroll attribute is not sufficient to
+   eliminate the loop condition from the unrolled body (e.g., we would need to
+   track the fact that the bound is even. *)
+let[@inline never] [@local never] [@specialize never] add_arrays_rec_unrolled_attribute
+    a b c n =
+  let[@loop never] rec loop i a b c n =
+    if i < n
+    then (
+      Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i);
+      (loop [@unrolled 1]) (i + 1) a b c n)
+  in
+  loop 0 a b c (2 * n)
+
+(* cannot vectorizer for-loops *)
+let[@inline never] [@local never] [@specialize never] add_arrays_for a b c n =
+  for i = 0 to n - 1 do
+    Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i)
+  done
+
+(* cannot vectorizer loops expressed using recursion *)
+let[@inline never] [@local never] [@specialize never] add_arrays_rec a b c n =
+  let rec loop i =
+    if i < n
+    then (
+      Array.unsafe_set c i (Array.unsafe_get a i + Array.unsafe_get b i);
+      loop (i + 1))
+  in
+  loop 0
+
+let print_array ppf a =
+  let count = Array.length a in
+  for i = 0 to count - 1 do
+    Format.fprintf ppf "%d " a.(i)
+  done
+
+let print_floatarray ppf a =
+  let count = Array.length a in
+  for i = 0 to count - 1 do
+    Format.fprintf ppf "%f " a.(i)
+  done
+
+let () =
+  let n = Sys.opaque_identity 10 in
+  let a = Array.init n (fun i -> i) in
+  let b = Array.make n 17 in
+  let c = Array.make n 0 in
+  let d = Array.make n 0.0 in
+  add_arrays_unrolled_manually a b c (Sys.opaque_identity n);
+  Format.printf "add_arrays_unrolled_manually %a\n" print_array c;
+  add_arrays_unrolled_safe a b c (Sys.opaque_identity (n / 2));
+  Format.printf "add_arrays_unrolled_safe %a\n" print_array c;
+  add_arrays_rec_unrolled_attribute a b c (n / 2);
+  Format.printf "add_arrays_rec_unrolled_attribute %a\n" print_array c;
+  add_arrays_for a b c n;
+  Format.printf "add_arrays_for %a\n" print_array c;
+  add_arrays_rec a b c n;
+  Format.printf "add_arrays_rec %a\n" print_array c;
+  initialize_array_const_unrolled_manually c n;
+  Format.printf "initialize_array_const_unrolled_manually %a\n" print_array c;
+  initialize_arrays_const_unrolled_manually a b c n;
+  Format.printf "initialize_arrays_const_unrolled_manually %a\n" print_array c;
+  initialize_array_unrolled_manually c n (Sys.opaque_identity 17);
+  Format.printf "initialize_array_unrolled_manually %a\n" print_array c;
+  initialize_floatarray_unrolled_manually d n (Sys.opaque_identity 7.7);
+  Format.printf "initialize_floatarray_unrolled_manually %a\n" print_floatarray
+    d;
+  ()
diff --git a/flambda-backend/tests/backend/vectorizer/test_arrays.mli b/flambda-backend/tests/backend/vectorizer/test_arrays.mli
@@ -0,0 +1 @@
+(* blank, make sure all the functions are called from top-level *)
diff --git a/flambda-backend/tests/backend/vectorizer/test_arrays_vectorized.cmx.dump.expected b/flambda-backend/tests/backend/vectorizer/test_arrays_vectorized.cmx.dump.expected
@@ -0,0 +1 @@
+**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test_arrays_vectorized.add_arrays_unrolled_manually)
diff --git a/flambda-backend/tests/backend/vectorizer/test_float.expected b/flambda-backend/tests/backend/vectorizer/test_float.expected
@@ -0,0 +1,7 @@
+add_mutable_record { d0 = 88.000000 ; d1 = 110.000000 }
+copy_mutable_record { d0 = 88.000000 ; d1 = 110.000000 }
+add_mutable_record_fresh { d0 = 88.000000 ; d1 = 110.000000 }
+copy_mutable_record_fresh { d0 = 88.000000 ; d1 = 110.000000 }
+add_mutable_record_t4 { d0 = 88.000000 ; d1 = 110.000000; d2 = 88.000000 ; d3 = 110.000000 }
+copy_mutable_record_t4 { d0 = 8.000000 ; d1 = 96.000000; d2 = 80.000000 ; d3 = 14.000000 }
+dup_mutable_record_t4 { d0 = 8.000000 ; d1 = 96.000000; d2 = 8.000000 ; d3 = 96.000000 }
diff --git a/flambda-backend/tests/backend/vectorizer/test_float.ml b/flambda-backend/tests/backend/vectorizer/test_float.ml
@@ -0,0 +1,75 @@
+[@@@ocaml.warnerror "+a-40-41-42"]
+
+type t1 =
+  { mutable d0 : float;
+    mutable d1 : float
+  }
+
+let[@inline never] [@local never] [@specialize never] add_mutable_record
+    (a : t1) (b : t1) (c : t1) : t1 =
+  c.d0 <- Float.add a.d0 b.d0;
+  c.d1 <- Float.add a.d1 b.d1;
+  c
+
+let[@inline never] [@local never] [@specialize never] copy_mutable_record
+    (a : t1) (b : t1) : t1 =
+  b.d0 <- a.d0;
+  b.d1 <- a.d1;
+  b
+
+let[@inline never] [@local never] [@specialize never] add_mutable_record_fresh
+    (a : t1) (b : t1) : t1 =
+  { d0 = Float.add a.d0 b.d0; d1 = Float.add a.d1 b.d1 }
+
+let[@inline never] [@local never] [@specialize never] copy_mutable_record_fresh
+    (a : t1) : t1 =
+  { d0 = a.d0; d1 = a.d1 }
+
+type t4 =
+  { mutable d0 : float;
+    mutable d1 : float;
+    mutable d2 : float;
+    mutable d3 : float
+  }
+
+let[@inline never] [@local never] [@specialize never] add_mutable_record_t4
+    (a : t1) (b : t1) (c : t4) : t4 =
+  c.d0 <- Float.add a.d0 b.d0;
+  c.d1 <- Float.add a.d1 b.d1;
+  c.d2 <- Float.add a.d0 b.d0;
+  c.d3 <- Float.add a.d1 b.d1;
+  c
+
+let[@inline never] [@local never] [@specialize never] copy_mutable_record_t4
+    (a : t1) (b : t1) : t4 =
+  { d0 = a.d0; d1 = a.d1; d2 = b.d0; d3 = b.d1 }
+
+let[@inline never] [@local never] [@specialize never] dup_mutable_record_t4
+    (a : t1) : t4 =
+  { d0 = a.d0; d1 = a.d1; d2 = a.d0; d3 = a.d1 }
+
+let print_t1 ppf (t1 : t1) =
+  Format.fprintf ppf "{ d0 = %f ; d1 = %f }" t1.d0 t1.d1
+
+let print_t4 ppf (t4 : t4) =
+  Format.fprintf ppf "{ d0 = %f ; d1 = %f; d2 = %f ; d3 = %f }" t4.d0 t4.d1
+    t4.d2 t4.d3
+
+let () =
+  let a = { d0 = 8.; d1 = 96. } in
+  let b = { d0 = 80.; d1 = 14. } in
+  let c = { d0 = 10.; d1 = -10. } in
+  let t4 = { d0 = 10.; d1 = -10.; d2 = 199.; d3 = 18. } in
+  let res = { d0 = 0.; d1 = -0. } in
+  Format.printf "add_mutable_record %a\n" print_t1 (add_mutable_record a b c);
+  Format.printf "copy_mutable_record %a\n" print_t1 (copy_mutable_record c res);
+  Format.printf "add_mutable_record_fresh %a\n" print_t1
+    (add_mutable_record_fresh a b);
+  Format.printf "copy_mutable_record_fresh %a\n" print_t1
+    (copy_mutable_record_fresh c);
+  Format.printf "add_mutable_record_t4 %a\n" print_t4
+    (add_mutable_record_t4 a b t4);
+  Format.printf "copy_mutable_record_t4 %a\n" print_t4
+    (copy_mutable_record_t4 a b);
+  Format.printf "dup_mutable_record_t4 %a\n" print_t4 (dup_mutable_record_t4 a);
+  ()
diff --git a/flambda-backend/tests/backend/vectorizer/test_float.mli b/flambda-backend/tests/backend/vectorizer/test_float.mli
@@ -0,0 +1 @@
+(* blank, make sure all the functions are called from top-level *)
diff --git a/flambda-backend/tests/backend/vectorizer/test_float32_unboxed.expected b/flambda-backend/tests/backend/vectorizer/test_float32_unboxed.expected
@@ -0,0 +1,5 @@
+add_unboxed_pairs_mutable_record { d0 = 88. ; d1 = 110.; d2 = 0. ; d3 = -1. }
+copy_unboxed_pairs_mutable_record { d0 = 88. ; d1 = 110.; d2 = 0. ; d3 = -1. }
+copy_bytes 10. 10. 10. 10. 
+copy_bytes_pos 10. 10. 10. 10. 
+copy_bytes_pos_v2 10. 10. 10. 10.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		(* blank, make sure all the functions are called from top-level *)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test_arrays_vectorized.add_arrays_unrolled_manually)