-
Notifications
You must be signed in to change notification settings - Fork 0
/
datafile.ml
60 lines (46 loc) · 5.46 KB
/
datafile.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
open Batteries_uni
open Bigarray
open Unix
let cols = 900
let kind = float32
let kind64 = float64
let layout = fortran_layout
let get_matrix fn =
let fh = openfile fn [O_RDONLY] 0o755 in
Array2.map_file fh kind layout false cols (-1)
let write rows output_file =
let fh = openfile output_file [O_RDWR;O_CREAT] 0o755 in
Array2.map_file fh kind layout true cols rows
let get_matrix64 fn =
let fh = openfile fn [O_RDONLY] 0o755 in
Array2.map_file fh kind64 layout false cols (-1)
let write64 rows output_file =
let fh = openfile output_file [O_RDWR;O_CREAT] 0o755 in
Array2.map_file fh kind64 layout true cols rows
let read_text fn =
File.lines_of fn |> Enum.map (fun line -> String.nsplit line " " |> List.map float_of_string)
let read_label_file fn =
File.lines_of fn /@ int_of_string |> Array.of_enum |> Array1.of_array int8_unsigned layout
let scaling = [54; 75; 28; 386; 52; 43; 289; 113; 44; 74; 22; 45; 76; 37; 47; 21; 37; 79; 15; 87; 57; 64; 275; 46; 40; 77; 24; 76; 85; 59; 72; 18; 42; 19; 201; 66; 71; 100; 36; 44; 31; 27; 49; 87; 119; 39; 40; 80; 52; 38; 399; 115; 52; 19; 75; 38; 42; 72; 404; 96; 163; 20; 65; 230; 43; 822; 38; 85; 60; 50; 83; 41; 26; 17; 30; 66; 34; 17; 57; 49; 40; 88; 83; 138; 48; 128; 398; 33; 38; 87; 118; 40; 41; 101; 39; 75; 101; 265; 17; 89; 96; 48; 334; 31; 77; 43; 61; 132; 30; 55; 45; 115; 241; 81; 52; 29; 16; 15; 419; 24; 108; 87; 45; 213; 72; 143; 80; 87; 87; 52; 71; 17; 29; 37; 43; 64; 27; 48; 100; 39; 138; 234; 49; 94; 21; 70; 595; 20; 97; 56; 45; 33; 22; 17; 79; 16; 54; 27; 140; 31; 19; 33; 99; 476; 22; 58; 34; 13; 47; 31; 404; 52; 67; 292; 21; 101; 16; 53; 24; 313; 35; 118; 157; 34; 45; 218; 27; 63; 57; 26; 429; 52; 77; 268; 54; 35; 24; 41; 40; 22; 81; 16; 52; 29; 73; 43; 40; 72; 79; 22; 19; 495; 27; 40; 75; 56; 49; 69; 66; 59; 27; 62; 87; 35; 107; 36; 65; 33; 37; 63; 96; 27; 26; 17; 95; 37; 19; 24; 52; 63; 24; 88; 63; 33; 29; 95; 52; 60; 56; 112; 60; 78; 24; 63; 202; 21; 46; 58; 48; 78; 39; 48; 74; 142; 21; 578; 48; 95; 74; 70; 77; 54; 55; 77; 114; 28; 111; 23; 64; 71; 23; 61; 52; 26; 21; 26; 98; 59; 274; 38; 28; 37; 40; 152; 22; 51; 24; 58; 53; 32; 274; 130; 31; 34; 24; 318; 71; 31; 69; 56; 42; 49; 135; 65; 22; 73; 31; 30; 62; 55; 70; 59; 98; 79; 39; 62; 217; 23; 40; 112; 77; 45; 77; 28; 23; 549; 265; 29; 56; 106; 28; 46; 48; 45; 51; 510; 71; 83; 21; 52; 36; 26; 26; 104; 27; 57; 51; 56; 25; 77; 32; 116; 45; 166; 84; 48; 32; 48; 56; 52; 46; 42; 24; 48; 64; 21; 88; 55; 43; 54; 342; 44; 51; 147; 177; 52; 21; 31; 56; 70; 26; 144; 156; 23; 67; 108; 60; 28; 20; 101; 22; 89; 51; 106; 74; 310; 72; 19; 56; 50; 61; 66; 47; 23; 190; 36; 38; 43; 66; 27; 210; 18; 59; 32; 73; 87; 34; 31; 45; 65; 102; 661; 38; 47; 20; 27; 44; 36; 60; 90; 24; 167; 99; 46; 38; 314; 130; 55; 38; 49; 106; 114; 57; 94; 213; 120; 28; 84; 39; 77; 65; 55; 32; 39; 53; 45; 468; 43; 94; 87; 59; 31; 27; 76; 58; 460; 46; 39; 19; 28; 398; 87; 70; 95; 94; 36; 441; 63; 45; 18; 23; 95; 110; 100; 48; 62; 45; 84; 56; 126; 68; 49; 75; 78; 60; 25; 90; 21; 59; 110; 444; 75; 33; 39; 41; 40; 43; 50; 52; 105; 42; 43; 24; 204; 46; 38; 31; 28; 74; 49; 52; 36; 32; 49; 85; 34; 82; 71; 71; 24; 54; 47; 138; 89; 45; 53; 28; 115; 61; 199; 70; 40; 72; 52; 26; 46; 177; 55; 28; 56; 53; 43; 161; 81; 58; 59; 40; 128; 72; 31; 75; 19; 22; 50; 81; 38; 53; 26; 34; 24; 25; 88; 148; 74; 44; 44; 15; 20; 53; 22; 80; 33; 51; 16; 65; 70; 61; 21; 32; 344; 54; 173; 44; 64; 45; 102; 21; 59; 53; 492; 42; 47; 116; 32; 45; 302; 121; 129; 63; 190; 55; 132; 72; 27; 29; 16; 40; 104; 15; 51; 73; 84; 71; 395; 33; 49; 60; 52; 29; 33; 31; 426; 307; 45; 33; 44; 66; 76; 65; 21; 88; 69; 43; 75; 24; 338; 121; 38; 386; 88; 751; 58; 62; 66; 23; 73; 93; 76; 21; 33; 14; 43; 73; 93; 55; 36; 84; 22; 27; 93; 21; 48; 56; 775; 563; 726; 58; 27; 36; 50; 57; 227; 33; 93; 39; 33; 87; 150; 40; 46; 22; 36; 139; 35; 55; 55; 84; 31; 24; 82; 53; 56; 107; 26; 115; 412; 29; 100; 53; 335; 54; 118; 49; 125; 63; 88; 52; 37; 111; 174; 29; 45; 90; 46; 32; 35; 226; 82; 21; 93; 21; 72; 20; 50; 48; 52; 100; 908; 56; 62; 23; 46; 35; 15; 371; 73; 207; 54; 83; 25; 57; 73; 91; 62; 21; 43; 35; 25; 27; 48; 60; 127; 52; 16; 27; 62; 106; 91; 539; 16; 16; 52; 35; 61; 83; 35; 30; 25; 53; 33; 174; 92; 68; 62; 59; 32; 66; 108; 18; 36; 15; 209; 17; 22; 21; 14; 41; 78; 279; 29; 104; 60; 56; 70; 81; 54; 36; 57; 16; 176; 61; 36; 109; 29; 383; 98; 24; 31; 131; 47; 52; 47; 50; 122; 19; 730; 48; 40; 40; 45; 44; 67; 62; 37; 482; 78; 44; 35; 10; 36; 48; 49; 47; 44; 33; 123; 91; 46; 63; 48; 82; 33; 478; 60; 80; 53; 118; 27; 71; 21; 29; 63; 89; 35; 66; 54; 23; 109; 32; 32; 325; 124; 23; 77; 26; 38; 40; 382; 63; 151; 60; 20; 60; 17; 27; 27; 15; 22; 41; 44] |> List.map float |> Array.of_list
open Lacaml.Impl.S (* Single-precision reals *)
let print_float oc x = Printf.fprintf oc "%.3f" x
(* Check data *)
let check_data train_file =
let read fn =
let fh = openfile fn [O_RDONLY] 0o755 in
let mmap = Array2.map_file fh kind layout false cols (-1) in
let get_row i = Array2.slice_right mmap i in
(1--Array2.dim2 mmap) |> Enum.map get_row
in
let d0 = read_text "/home/thelema/development.txt" |> Enum.map Array.of_list in
let d1 = read train_file |> Enum.map Vec.to_array in
let epsilon = 0.0001 in
let float_eq a b = abs_float (a -. b) < epsilon in
let eq row l1 l2 =
if Array.map2 ( *. ) l2 scaling |> Array.for_all2 float_eq l1 then () else (
Printf.printf "Verification failed on row %d:\ntext:%a\npack:%a\n" row
(Array.print print_float) l1 (Array.print print_float) l2
)
in
Enum.iter2i eq d0 d1;
Printf.printf "Data verified\n"