Skip to content

Commit

Permalink
Merge branch 'global-name-is-module-id' into instance-syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
lukemaurer committed Oct 19, 2023
2 parents c7f2a29 + 156fd09 commit 8b42e26
Show file tree
Hide file tree
Showing 438 changed files with 43,746 additions and 16,731 deletions.
3 changes: 0 additions & 3 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

autoconf-aux @mshinwell @xclerc
configure.ac @mshinwell @xclerc
**/dune @mshinwell @xclerc
**/dune-project @mshinwell @xclerc
Makefile.in @mshinwell @xclerc

flambda_backend.opam @mshinwell @lthls

Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ jobs:
with:
path: 'flambda_backend'

- name: Install AFL (for Linux workers)
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get install afl++

- name: Install AFL (for macOS workers)
if: matrix.os == 'macos-latest'
run: HOMEBREW_NO_INSTALL_CLEANUP=TRUE brew install afl-fuzz

- name: Cache OCaml 4.14 and dune
uses: actions/cache@v2
id: cache
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/jane_syntax.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: jane-syntax-upstream-build

on: [push, pull_request]

jobs:
build:
runs-on: ${{ matrix.os }}

strategy:
matrix:
os: [ubuntu-latest]
ocaml-compiler:
- "4.14.0"

steps:
- name: Checkout the Flambda backend repo
uses: actions/checkout@master
with:
path: 'flambda_backend'

- name: Setup OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v2
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}

- name: Try building Jane_syntax and its dependencies with upstream OCaml
working-directory: flambda_backend
run: opam exec -- ocaml/tools/build_jane_syntax_with_active_opam_switch.sh
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,4 @@ ocamlopt:

.ocamldebug: install
find _build/main -name '*.cmo' -type f -printf 'directory %h\n' | sort -u > .ocamldebug
echo "source ocaml/tools/debug_printers" >> .ocamldebug
echo "source _build/main/$(ocamldir)/tools/debug_printers" >> .ocamldebug
4 changes: 4 additions & 0 deletions backend/.ocamlformat-enable
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ peephole/**/*.ml
peephole/**/*.mli
regalloc/**/*.ml
regalloc/**/*.mli
amd64/simd.ml
arm64/simd.ml
amd64/simd_selection.ml
arm64/simd_selection.ml
4 changes: 2 additions & 2 deletions backend/CSEgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ method class_of_operation op =
| Iintop_atomic _ -> Op_store true
| Icompf _
| Icsel _
| Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue -> Op_pure
| Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Iscalarcast _
| Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue | Ivectorcast _ -> Op_pure
| Ispecific _ -> Op_other
| Iname_for_debugger _ -> Op_other
| Iprobe_is_enabled _ -> Op_other
Expand Down
9 changes: 6 additions & 3 deletions backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,18 @@ method! class_of_operation op =
| Ibswap _ | Isqrtf -> super#class_of_operation op
| Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence -> Op_other
| Ifloat_iround | Ifloat_min | Ifloat_max | Ifloat_round _
| Icrc32q -> Op_pure
| Ifloat_iround | Ifloat_min | Ifloat_max | Ifloat_round _ -> Op_pure
| Isimd op ->
begin match Simd.class_of_operation op with
| Pure -> Op_pure
end
| Ipause
| Iprefetch _ -> Op_other
end
| Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Icompf _
| Icsel _
| Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue
| Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue | Ivectorcast _ | Iscalarcast _
| Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _
| Istackoffset _ | Iload _ | Istore _ | Ialloc _
Expand Down
53 changes: 39 additions & 14 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ let prefetchwt1_support = ref false
(* Emit elf notes with trap handling information. *)
let trap_notes = ref true

(* Basline x86_64 requires SSE and SSE2. The others are optional. *)
let sse3_support = ref true
let ssse3_support = ref true
let sse41_support = ref true
let sse42_support = ref true

(* Enable SIMD register allocation features. *)
let simd_regalloc = ref false

(* Machine-specific command-line options *)

let command_line_options =
Expand All @@ -42,10 +51,6 @@ let command_line_options =
" Use POPCNT instruction (not available prior to Nehalem) (default)";
"-fno-popcnt", Arg.Clear popcnt_support,
" Do not use POPCNT instruction";
"-fcrc32", Arg.Set crc32_support,
" Use CRC32 instructions (requires SSE4.2 support) (default)";
"-fno-crc32", Arg.Clear crc32_support,
" Do not emit CRC32 instructions";
"-fprefetchw", Arg.Set prefetchw_support,
" Use PREFETCHW instructions (not available on Haswell and earlier) \
(default)";
Expand All @@ -58,7 +63,27 @@ let command_line_options =
"-ftrap-notes", Arg.Set trap_notes,
" Emit .note.ocaml_eh section with trap handling information (default)";
"-fno-trap-notes", Arg.Clear trap_notes,
" Do not emit .note.ocaml_eh section with trap handling information"
" Do not emit .note.ocaml_eh section with trap handling information";
"-fsse3", Arg.Set sse3_support,
" Enable SSE3 intrinsics (default)";
"-fno-sse3", Arg.Clear sse3_support,
" Disable SSE3 intrinsics";
"-fssse3", Arg.Set ssse3_support,
" Enable SSSE3 intrinsics (default)";
"-fno-ssse3", Arg.Clear ssse3_support,
" Disable SSSE3 intrinsics";
"-fsse41", Arg.Set sse41_support,
" Enable SSE4.1 intrinsics (default)";
"-fno-sse41", Arg.Clear sse41_support,
" Disable SSE4.1 intrinsics";
"-fsse42", Arg.Set sse42_support,
" Enable SSE4.2 intrinsics (default)";
"-fno-sse42", Arg.Clear sse42_support,
" Disable SSE4.2 intrinsics";
"-fsimd-regalloc", Arg.Set simd_regalloc,
" Enable SIMD register allocation (implied by -extension SIMD)";
"-fno-simd-regalloc", Arg.Clear simd_regalloc,
" Disable SIMD register allocation (overridden by -extension SIMD) (default)";
]

(* Specific operations for the AMD64 processor *)
Expand Down Expand Up @@ -111,8 +136,8 @@ type specific_operation =
| Ilfence (* load fence *)
| Isfence (* store fence *)
| Imfence (* memory fence *)
| Icrc32q (* compute crc *)
| Ipause (* hint for spin-wait loops *)
| Isimd of Simd.operation (* vectorized operations *)
| Iprefetch of (* memory prefetching hint *)
{ is_write: bool;
locality: prefetch_temporal_locality_hint;
Expand Down Expand Up @@ -241,8 +266,8 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "mfence"
| Irdpmc ->
fprintf ppf "rdpmc %a" printreg arg.(0)
| Icrc32q ->
fprintf ppf "crc32 %a %a" printreg arg.(0) printreg arg.(1)
| Isimd simd ->
Simd.print_operation printreg simd ppf arg
| Ipause ->
fprintf ppf "pause"
| Iprefetch { is_write; locality; } ->
Expand All @@ -262,19 +287,19 @@ let operation_is_pure = function
| Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32 -> true
| Ifloatarithmem _ | Ifloatsqrtf _ -> true
| Ifloat_iround | Ifloat_round _ | Ifloat_min | Ifloat_max -> true
| Icrc32q -> true
| Irdtsc | Irdpmc | Ipause
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Iprefetch _ -> false
| Isimd op -> Simd.is_pure op

(* Specific operations that can raise *)

let operation_can_raise = function
| Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32
| Ifloatarithmem _ | Ifloatsqrtf _
| Ifloat_iround | Ifloat_round _ | Ifloat_min | Ifloat_max
| Icrc32q | Irdtsc | Irdpmc | Ipause
| Irdtsc | Irdpmc | Ipause | Isimd _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Iprefetch _ -> false
Expand All @@ -283,7 +308,7 @@ let operation_allocates = function
| Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32
| Ifloatarithmem _ | Ifloatsqrtf _
| Ifloat_iround | Ifloat_round _ | Ifloat_min | Ifloat_max
| Icrc32q | Irdtsc | Irdpmc | Ipause
| Irdtsc | Irdpmc | Ipause | Isimd _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Iprefetch _ -> false
Expand Down Expand Up @@ -376,8 +401,6 @@ let equal_specific_operation left right =
true
| Imfence, Imfence ->
true
| Icrc32q, Icrc32q ->
true
| Ifloat_iround, Ifloat_iround -> true
| Ifloat_round x, Ifloat_round y -> equal_rounding_mode x y
| Ifloat_min, Ifloat_min -> true
Expand All @@ -388,8 +411,10 @@ let equal_specific_operation left right =
Bool.equal left_is_write right_is_write
&& equal_prefetch_temporal_locality_hint left_locality right_locality
&& equal_addressing_mode left_addr right_addr
| Isimd l, Isimd r ->
Simd.equal_operation l r
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _
| Isqrtf | Ifloatsqrtf _ | Isextend32 | Izextend32 | Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence | Ifloat_iround | Ifloat_round _ |
Ifloat_min | Ifloat_max | Ipause | Icrc32q | Iprefetch _), _ ->
Ifloat_min | Ifloat_max | Ipause | Isimd _ | Iprefetch _), _ ->
false
89 changes: 77 additions & 12 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ let _label s = D.label ~typ:QWORD s

(* Override proc.ml *)

let simd_regalloc_disabled () = not (Language_extension.is_enabled SIMD)
let simd_frontend_disabled () = not (Language_extension.is_enabled SIMD)

let int_reg_name =
[| RAX; RBX; RDI; RSI; RDX; RCX; R8; R9;
Expand All @@ -54,8 +54,8 @@ let register_name typ r =
| Int | Val | Addr -> Reg64 (int_reg_name.(r))
| Float -> Regf (float_reg_name.(r - 100))
| Vec128 ->
if simd_regalloc_disabled () then
Misc.fatal_error "SIMD register allocation is not enabled.";
if simd_frontend_disabled ()
then Misc.fatal_error "SIMD types are not enabled, but got a Vec128 register.";
Regf (float_reg_name.(r - 100))

(* CFI directives *)
Expand Down Expand Up @@ -95,7 +95,8 @@ let frame_required = ref false

let frame_size () = (* includes return address *)
if !frame_required then begin
if simd_regalloc_disabled () then assert (num_stack_slots.(2) = 0);
if simd_frontend_disabled () && (num_stack_slots.(2) > 0)
then Misc.fatal_error "SIMD types are not enabled, but got a Vec128 stack slot.";
let sz =
(!stack_offset
+ 8
Expand All @@ -111,7 +112,8 @@ let slot_offset loc cl =
match loc with
| Incoming n -> frame_size() + n
| Local n ->
if simd_regalloc_disabled () then assert (num_stack_slots.(2) = 0 && cl < 2);
if simd_frontend_disabled () && (num_stack_slots.(2) > 0 || cl >= 2)
then Misc.fatal_error "SIMD types are not enabled, but got a Vec128 stack slot.";
(!stack_offset +
(* Preserves original ordering (int -> float) *)
match cl with
Expand Down Expand Up @@ -696,8 +698,8 @@ let add_vec128_constant bits =
lbl

let emit_vec128_constant {high; low} lbl =
_label (emit_label lbl);
(* SIMD vectors respect little-endian byte order *)
_label (emit_label lbl);
D.qword (Const low);
D.qword (Const high)

Expand Down Expand Up @@ -906,6 +908,38 @@ let emit_atomic instr op (size : Cmm.atomic_bitwidth) addr =
I.set E res8;
I.movzx res8 res

let emit_simd_instr op i =
(match Simd_selection.register_behavior op with
| RM_to_R ->
assert (Reg.is_reg i.res.(0))
| R_to_R ->
assert (Reg.is_reg i.arg.(0) && Reg.is_reg i.res.(0))
| R_RM_to_fst ->
assert (arg i 0 = res i 0);
assert (Reg.is_reg i.arg.(0))
| R_R_to_fst ->
assert (arg i 0 = res i 0);
assert (Reg.is_reg i.arg.(0) && Reg.is_reg i.arg.(1)));
match (op : Simd.operation) with
| SSE (Cmp_f32 n) -> I.cmpps n (arg i 1) (res i 0)
| SSE Add_f32 -> I.addps (arg i 1) (res i 0)
| SSE Sub_f32 -> I.subps (arg i 1) (res i 0)
| SSE Mul_f32 -> I.mulps (arg i 1) (res i 0)
| SSE Div_f32 -> I.divps (arg i 1) (res i 0)
| SSE Max_f32 -> I.maxps (arg i 1) (res i 0)
| SSE Min_f32 -> I.minps (arg i 1) (res i 0)
| SSE Rcp_f32 -> I.rcpps (arg i 0) (res i 0)
| SSE Sqrt_f32 -> I.sqrtps (arg i 0) (res i 0)
| SSE Rsqrt_f32 -> I.rsqrtps (arg i 0) (res i 0)
| SSE High_64_to_low_64 -> I.movhlps (arg i 1) (res i 0)
| SSE Low_64_to_high_64 -> I.movlhps (arg i 1) (res i 0)
| SSE Interleave_high_32 -> I.unpckhps (arg i 1) (res i 0)
| SSE Interleave_low_32 -> I.unpcklps (arg i 1) (res i 0)
| SSE Movemask_32 -> I.movmskps (arg i 0) (res i 0)
| SSE (Shuffle_32 n) -> I.shufps n (arg i 1) (res i 0)
| SSE42 Crc32_64 -> I.crc32 (arg i 1) (res i 0)
| _ -> .

(* Emit an instruction *)
let emit_instr fallthrough i =
emit_debug_info_linear i;
Expand Down Expand Up @@ -1180,8 +1214,40 @@ let emit_instr fallthrough i =
I.cvtsi2sd (arg i 0) (res i 0)
| Lop(Iintoffloat) ->
I.cvttsd2si (arg i 0) (res i 0)
| Lop(Iintofvalue | Ivalueofint) ->
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
move i.arg.(0) i.res.(0)
| Lop(Iscalarcast (V128_of_scalar Float64x2 | V128_to_scalar Float64x2)) ->
I.movsd (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Int64x2 | V128_of_scalar Int64x2)) ->
I.movq (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Int32x4)) ->
I.movd (arg i 0) (res32 i 0)
| Lop(Iscalarcast (V128_of_scalar Int32x4)) ->
I.movd (arg32 i 0) (res i 0)
| Lop(Iscalarcast (V128_of_scalar Float32x4)) ->
(* CR mslater: (SIMD) remove cvt once we have unboxed float32 *)
I.cvtsd2ss (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Float32x4)) ->
(* CR mslater: (SIMD) remove cvt once we have unboxed float32 *)
I.cvtss2sd (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Int16x8)) ->
(* [movw] and [movzx] cannot operate on vector registers.
We must zero extend as the result is an untagged positive int.
CR mslater: (SIMD) remove zx once we have unboxed int16 *)
I.movd (arg i 0) (res32 i 0);
I.movzx (res16 i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Int8x16)) ->
(* [movb] and [movzx] cannot operate on vector registers.
We must zero extend as the result is an untagged positive int.
CR mslater: (SIMD) remove zx once we have unboxed int8 *)
I.movd (arg i 0) (res32 i 0);
I.movzx (res8 i 0) (res i 0)
| Lop(Iscalarcast (V128_of_scalar Int16x8 | V128_of_scalar Int8x16)) ->
(* [movw] and [movb] cannot operate on vector registers.
Moving 32 bits is OK because the argument is an untagged
positive int and these operations leave the top bits of the vector unspecified.
CR mslater: (SIMD) don't load 32 bits once we have unboxed int16/int8 *)
I.movd (arg32 i 0) (res i 0)
| Lop(Iopaque) ->
assert (i.arg.(0).loc = i.res.(0).loc)
| Lop(Ispecific(Ilea addr)) ->
Expand Down Expand Up @@ -1295,9 +1361,8 @@ let emit_instr fallthrough i =
I.sfence ()
| Lop (Ispecific Imfence) ->
I.mfence ()
| Lop (Ispecific Icrc32q) ->
assert (arg i 0 = res i 0);
I.crc32 (arg i 1) (res i 0)
| Lop (Ispecific (Isimd op)) ->
emit_simd_instr op i
| Lop (Ispecific Ipause) ->
I.pause ()
| Lop (Ispecific (Iprefetch { is_write; locality; addr; })) ->
Expand Down Expand Up @@ -1632,8 +1697,8 @@ let make_stack_loc ~offset n (r : Reg.t) =
(match r.typ with
| Int | Val | Addr | Float -> ()
| Vec128 ->
if simd_regalloc_disabled () then
Misc.fatal_error "SIMD register allocation is not enabled.");
if simd_frontend_disabled ()
then Misc.fatal_error "SIMD types are not enabled, but got a Vec128 register.");
Reg.at_location r.typ loc

(* CR mshinwell: Not now, but after code review, it would be better to
Expand Down
Loading

0 comments on commit 8b42e26

Please sign in to comment.