-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathBBalpha_dropout.py
133 lines (112 loc) · 4.48 KB
/
BBalpha_dropout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
'''
Copyright 2017, Yingzhen Li and Yarin Gal, All rights reserved.
Please consider citing the ICML 2017 paper if using any of this code for your research:
Yingzhen Li and Yarin Gal.
Dropout inference in Bayesian neural networks with alpha-divergences.
International Conference on Machine Learning (ICML), 2017.
'''
from keras import backend as K
from keras.callbacks import Callback
from keras.datasets import mnist
from keras.layers import Input, Dense, Lambda, Activation, Flatten, Convolution2D, MaxPooling2D
from keras.models import Model
from keras.regularizers import l2
from keras.utils import np_utils
import numpy as np
import os, pickle, sys, time
###################################################################
# aux functions
def Dropout_mc(p):
layer = Lambda(lambda x: K.dropout(x, p), output_shape=lambda shape: shape)
return layer
def Identity(p):
layer = Lambda(lambda x: x, output_shape=lambda shape: shape)
return layer
def pW(p):
layer = Lambda(lambda x: x*(1.0-p), output_shape=lambda shape: shape)
return layer
def apply_layers(inp, layers):
output = inp
for layer in layers:
output = layer(output)
return output
def GenerateMCSamples(inp, layers, K_mc=20):
if K_mc == 1:
return apply_layers(inp, layers)
output_list = []
for _ in xrange(K_mc):
output_list += [apply_layers(inp, layers)] # THIS IS BAD!!! we create new dense layers at every call!!!!
def pack_out(output_list):
#output = K.pack(output_list) # K_mc x nb_batch x nb_classes
output = K.stack(output_list) # K_mc x nb_batch x nb_classes
return K.permute_dimensions(output, (1, 0, 2)) # nb_batch x K_mc x nb_classes
def pack_shape(s):
s = s[0]
assert len(s) == 2
return (s[0], K_mc, s[1])
out = Lambda(pack_out, output_shape=pack_shape)(output_list)
return out
# evaluation for classification tasks
def test_MC_dropout(model, X, Y):
pred = model.predict(X) # N x K x D
pred = np.mean(pred, 1)
acc = np.mean(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1))
ll = np.sum(np.log(np.sum(pred * Y, -1)))
return acc, ll
def logsumexp(x, axis=None):
x_max = K.max(x, axis=axis, keepdims=True)
return K.log(K.sum(K.exp(x - x_max), axis=axis, keepdims=True)) + x_max
def bbalpha_softmax_cross_entropy_with_mc_logits(alpha):
alpha = K.cast_to_floatx(alpha)
def loss(y_true, mc_logits):
# log(p_ij), p_ij = softmax(logit_ij)
#assert mc_logits.ndim == 3
mc_log_softmax = mc_logits - K.max(mc_logits, axis=2, keepdims=True)
mc_log_softmax = mc_log_softmax - K.log(K.sum(K.exp(mc_log_softmax), axis=2, keepdims=True))
mc_ll = K.sum(y_true * mc_log_softmax, -1) # N x K
K_mc = mc_ll.get_shape().as_list()[1] # only for tensorflow
return - 1. / alpha * (logsumexp(alpha * mc_ll, 1) + K.log(1.0 / K_mc))
return loss
###################################################################
# the model
def get_logit_mlp_layers(nb_layers, nb_units, p, wd, nb_classes, layers = [], \
dropout = 'none'):
if dropout == 'MC':
D = Dropout_mc
if dropout == 'pW':
D = pW
if dropout == 'none':
D = Identity
for _ in xrange(nb_layers):
layers.append(D(p))
layers.append(Dense(nb_units, activation='relu', W_regularizer=l2(wd)))
layers.append(D(p))
layers.append(Dense(nb_classes, W_regularizer=l2(wd)))
return layers
def get_logit_cnn_layers(nb_units, p, wd, nb_classes, layers = [], dropout = False):
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
if dropout == 'MC':
D = Dropout_mc
if dropout == 'pW':
D = pW
if dropout == 'none':
D = Identity
layers.append(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
border_mode='valid', W_regularizer=l2(wd)))
layers.append(Activation('relu'))
layers.append(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
W_regularizer=l2(wd)))
layers.append(Activation('relu'))
layers.append(MaxPooling2D(pool_size=pool_size))
layers.append(Flatten())
layers.append(D(p))
layers.append(Dense(nb_units, W_regularizer=l2(wd)))
layers.append(Activation('relu'))
layers.append(D(p))
layers.append(Dense(nb_classes, W_regularizer=l2(wd)))
return layers