forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnumballotbox.go
120 lines (98 loc) · 2.67 KB
/
numballotbox.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package CloudForest
import (
"fmt"
"log"
"math"
"strconv"
)
//Keeps track of votes by trees.
//Voteing is thread safe.
type NumBallotBox struct {
box []*RunningMean
}
//Build a new ballot box for the number of cases specified by "size".
func NewNumBallotBox(size int) *NumBallotBox {
bb := NumBallotBox{
make([]*RunningMean, 0, size)}
for i := 0; i < size; i++ {
bb.box = append(bb.box, new(RunningMean))
}
return &bb
}
//Vote parses the float in the string and votes for it
func (bb *NumBallotBox) Vote(casei int, pred string, weight float64) {
v, err := strconv.ParseFloat(pred, 64)
if err == nil {
bb.box[casei].WeightedAdd(v, weight)
}
}
//TallyNumerical tallies the votes for the case specified by i as
//if it is a Numerical feature. Ie it returns the mean of all votes.
func (bb *NumBallotBox) TallyNum(i int) (predicted float64) {
predicted, _ = bb.box[i].Read()
return
}
func (bb *NumBallotBox) Tally(i int) (predicted string) {
mean, count := bb.box[i].Read()
if count > 0 {
predicted = fmt.Sprintf("%v", mean)
} else {
predicted = "NA"
}
return
}
//TallySquareError returns the error of the votes vs the provided feature.
//For categorical features it returns the error rate
//For numerical features it returns mean squared error.
//The provided feature must use the same index as the feature matrix
//the ballot box was constructed with.
//Missing values are ignored.
//Gini impurity is not used so this is not for use in rf implementations.
func (bb *NumBallotBox) TallySquaredError(feature Feature) (e float64) {
e = 0.0
// Numerical feature. Calculate mean squared
d := 0.0
c := 0
for i := 0; i < feature.Length(); i++ {
predicted := bb.TallyNum(i)
if !feature.IsMissing(i) && !math.IsNaN(predicted) {
value := feature.(NumFeature).Get(i)
d = float64(value) - predicted
e += d * d
c += 1
}
}
if c == 0.0 {
log.Fatal("TallyError with 0 count!")
}
e = e / float64(c)
return
}
//TallyScore returns the squared error (unexplained variance) divided by the data variance.
func (bb *NumBallotBox) TallyError(feature Feature) (e float64) {
mean := 0.0
r2 := 0.0
total := 0
for i := 0; i < feature.Length(); i++ {
if !feature.IsMissing(i) {
mean += feature.(*DenseNumFeature).Get(i)
total++
}
}
mean /= float64(total)
for i := 0; i < feature.Length(); i++ {
if !feature.IsMissing(i) {
value := feature.(NumFeature).Get(i)
d := float64(value) - mean
r2 += d * d
}
}
r2 /= float64(total)
e = bb.TallySquaredError(feature) / r2
return
}
//Tally score returns the R2 score or coefichent of determination.
func (bb *NumBallotBox) TallyR2Score(feature Feature) (e float64) {
e = 1 - bb.TallyError(feature)
return
}