forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnumadaboostingtarget.go
115 lines (101 loc) · 3.16 KB
/
numadaboostingtarget.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package CloudForest
import (
"math"
)
/*
NumNumAdaBoostTarget wraps a numerical feature as a target for us in (Experimental) Adaptive Boosting
Regression.
*/
type NumAdaBoostTarget struct {
NumFeature
Weights []float64
NormFactor float64
}
func NewNumAdaBoostTarget(f NumFeature) (abt *NumAdaBoostTarget) {
nCases := f.Length()
abt = &NumAdaBoostTarget{f, make([]float64, nCases), 0.0}
cases := make([]int, nCases)
for i, _ := range abt.Weights {
abt.Weights[i] = 1 / float64(nCases)
cases[i] = i
}
abt.NormFactor = abt.Impurity(&cases, nil) * float64(nCases)
return
}
/*
NumAdaBoostTarget.SplitImpurity is an AdaBoosting version of SplitImpurity.
*/
func (target *NumAdaBoostTarget) SplitImpurity(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs) (impurityDecrease float64) {
nl := float64(len(*l))
nr := float64(len(*r))
nm := 0.0
impurityDecrease = nl * target.Impurity(l, allocs.LCounter)
impurityDecrease += nr * target.Impurity(r, allocs.RCounter)
if m != nil && len(*m) > 0 {
nm = float64(len(*m))
impurityDecrease += nm * target.Impurity(m, allocs.Counter)
}
impurityDecrease /= nl + nr + nm
return
}
//UpdateSImpFromAllocs willl be called when splits are being built by moving cases from r to l as in learning from numerical variables.
//Here it just wraps SplitImpurity but it can be implemented to provide further optimization.
func (target *NumAdaBoostTarget) UpdateSImpFromAllocs(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs, movedRtoL *[]int) (impurityDecrease float64) {
return target.SplitImpurity(l, r, m, allocs)
}
//NumAdaBoostTarget.Impurity is an AdaBoosting that uses the weights specified in NumAdaBoostTarget.weights.
func (target *NumAdaBoostTarget) Impurity(cases *[]int, counter *[]int) (e float64) {
e = 0.0
m := target.Predicted(cases)
for _, c := range *cases {
if target.IsMissing(c) == false {
e += target.Weights[c] * target.Norm(c, m)
}
}
return
}
//AdaBoostTarget.Boost performs numerical adaptive boosting using the specified partition and
//returns the weight that tree that generated the partition should be given.
//Trees with error greater then the impurity of the total feature (NormFactor) times the number
//of partions are given zero weight. Other trees have tree weight set to:
//
// weight = math.Log(1 / norm)
//
//and weights updated to:
//
// t.Weights[c] = t.Weights[c] * math.Exp(t.Error(&[]int{c}, m)*weight)
//
//These functions are chosen to provide a rough analog to catagorical adaptive boosting for
//numerical data with unbounded error.
func (t *NumAdaBoostTarget) Boost(leaves *[][]int) (weight float64) {
if len(*leaves) == 0 {
return 0.0
}
imp := 0.0
//nCases := 0
for _, cases := range *leaves {
imp += t.Impurity(&cases, nil)
//nCases += len(cases)
}
norm := t.NormFactor
if imp > norm {
return 0.0
}
weight = math.Log(norm / imp)
for _, cases := range *leaves {
m := t.Predicted(&cases)
for _, c := range cases {
if t.IsMissing(c) == false {
t.Weights[c] = t.Weights[c] * math.Exp(weight*(t.Norm(c, m)-imp))
}
}
}
normfactor := 0.0
for _, v := range t.Weights {
normfactor += v
}
for i, v := range t.Weights {
t.Weights[i] = v / normfactor
}
return
}