Skip to content

Commit

Permalink
Xiaowuhu/190806 (microsoft#318)
Browse files Browse the repository at this point in the history
* jj

* jj

* kkk
  • Loading branch information
xiaowuhu authored Aug 7, 2019
1 parent 57df3af commit 649af90
Show file tree
Hide file tree
Showing 85 changed files with 312 additions and 1,752 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ def __init__(self, activator):
self.activator = activator

def forward(self, input, train=True):
self.input_shape = input.shape
self.x = input
self.a = self.activator.forward(self.x)
return self.a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def __init__(self, classifier):
self.classifier = classifier

def forward(self, input, train=True):
self.input_shape = input.shape
self.x = input
self.a = self.classifier.forward(self.x)
return self.a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,20 @@ def inference(self, X):
output = self.__forward(X, train=False)
return output

def backward(self, X, Y):
def __backward(self, X, Y):
delta_in = self.output - Y
for i in range(self.layer_count-1,-1,-1):
layer = self.layer_list[i]
delta_out = layer.backward(delta_in, i)
# move back to previous layer
delta_in = delta_out

def pre_update(self):
def __pre_update(self):
for i in range(self.layer_count-1,-1,-1):
layer = self.layer_list[i]
layer.pre_update()

def update(self):
def __update(self):
for i in range(self.layer_count-1,-1,-1):
layer = self.layer_list[i]
layer.update()
Expand All @@ -100,14 +100,14 @@ def train(self, dataReader, checkpoint=0.1, need_test=True):

# for optimizers which need pre-update weights
if self.hp.optimizer_name == OptimizerName.Nag:
self.pre_update()
self.__pre_update()

# get z from x,y
self.__forward(batch_x, train=True)
# calculate gradient of w and b
self.backward(batch_x, batch_y)
self.__backward(batch_x, batch_y)
# final update w,b
self.update()
self.__update()

total_iteration = epoch * max_iteration + iteration
if (total_iteration+1) % checkpoint_iteration == 0:
Expand Down Expand Up @@ -144,15 +144,13 @@ def CheckErrorAndLoss(self, dataReader, train_x, train_y, epoch, total_iteration
# calculate train loss
self.__forward(train_x, train=False)
loss_train = self.lossFunc.CheckLoss(self.output, train_y)
loss_train = loss_train# + regular_cost / train_x.shape[0]
accuracy_train = self.__CalAccuracy(self.output, train_y)
print("loss_train=%.6f, accuracy_train=%f" %(loss_train, accuracy_train))

# calculate validation loss
vld_x, vld_y = dataReader.GetValidationSet()
self.__forward(vld_x, train=False)
loss_vld = self.lossFunc.CheckLoss(self.output, vld_y)
loss_vld = loss_vld #+ regular_cost / vld_x.shape[0]
accuracy_vld = self.__CalAccuracy(self.output, vld_y)
print("loss_valid=%.6f, accuracy_valid=%f" %(loss_vld, accuracy_vld))

Expand Down Expand Up @@ -191,10 +189,6 @@ def __CalAccuracy(self, a, y):
correct = r.sum()
return correct/m

def inference(self, X):
self.__forward(X, train=False)
return self.output

# save weights value when got low loss than before
def save_parameters(self):
print("save parameters")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# 帮助类,用于记录损失函数值极其对应的权重/迭代次数
class TrainingHistory_2_4(object):
def __init__(self, need_earlyStop = False, patience = 5):
def __init__(self):
self.loss_train = []
self.accuracy_train = []
self.iteration_seq = []
Expand Down Expand Up @@ -89,16 +89,6 @@ def ShowLossHistory(self, title, xcoord, xmin=None, xmax=None, ymin=None, ymax=N
plt.show()
return title

def ShowLossHistory4(self, axes, title, xmin=None, xmax=None, ymin=None, ymax=None):
p2, = axes.plot(self.epoch_seq, self.loss_train)
p1, = axes.plot(self.epoch_seq, self.loss_val)
axes.set_title(title)
axes.set_xlabel("epoch")
axes.set_ylabel("loss")
if xmin != None and ymin != None:
axes.axis([xmin, xmax, ymin, ymax])
return title

def GetEpochNumber(self):
return self.epoch_seq[-1]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.

# coding: utf-8

import numpy as np

from MnistImageDataReader import *
from ExtendedDataReader.MnistImageDataReader import *

test_image_file = '../../Data/test-images-10'
test_label_file = '../../Data/test-labels-10'

class MnistAugmentationReader(MnistImageDataReader):
def ReadData(self):
data = np.load("level5_data.npz")
data = np.load("augmentation/data.npz")
image = data["data"]
label = data["label"]
assert(image.shape[0] == label.shape[0])
Expand All @@ -25,5 +26,3 @@ def ReadData(self):
if self.mode == "vector":
self.num_feature = 784
self.num_validation = 0


Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.

# coding: utf-8

import numpy as np

from MnistImageDataReader import *
from ExtendedDataReader.MnistImageDataReader import *

train_image_file_temp = 'ensemble/{0}.npz'
test_image_file = 'test-images-10'
test_label_file = 'test-labels-10'

class MnistBaggingReader(MnistImageDataReader):
def ReadData(self):
data = np.load(self.train_image_file)
def ReadData(self, index):
train_image_file = str.format(train_image_file_temp, index)
data = np.load(train_image_file)
self.XTrainRaw = data["data"]
self.YTrainRaw = data["label"]
self.XTestRaw = self.ReadImageFile(self.test_image_file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,16 @@

import numpy as np
import struct
from MiniFramework.DataReader import *
from MiniFramework.DataReader_2_0 import *

# XTrainRaw - train data, not normalized
# XTestRaw - test data, not normalized
train_image_file = '../../Data/train-images-10'
train_label_file = '../../Data/train-labels-10'
test_image_file = '../../Data/test-images-10'
test_label_file = '../../Data/test-labels-10'

# YTrainRaw - train label data, not normalized
# YTestRaw - test label data, not normalized

# X - XTrainSet + XDevSet
# XTrainSet - train data normalized, come from XTrainRaw
# XDevSet - validation data, normalized, come from X
# XTestSet - test data, normalized, come from XTestRaw

# Y - YTrainSet + YDevSet
# YTrainSet - train label data normalized, come from YTrainRaw (one-hot, or 0/1)
# YDevSet - validation label data, normalized, come from YTrain
# YTestSet - test label data, normalized, come from YTestRaw (one-hot or 0/1)


class MnistImageDataReader(DataReader):
class MnistImageDataReader(DataReader_2_0):
# mode: "image"=Nx1x28x28, "vector"=1x784
def __init__(self, train_image_file, train_label_file, test_image_file, test_label_file, mode="image"):
def __init__(self, mode="image"):
self.train_image_file = train_image_file
self.train_label_file = train_label_file
self.test_image_file = test_image_file
Expand All @@ -49,7 +37,7 @@ def ReadLessData(self, count):
self.YTrainRaw = self.YTrainRaw[0:count]

self.num_example = self.XTrainRaw.shape[0]
self.num_category = len(np.unique(self.YTrainRaw))
self.num_category = (np.unique(self.YTrainRaw)).shape[0]
self.num_test = self.XTestRaw.shape[0]
self.num_train = self.num_example
if self.mode == "vector":
Expand All @@ -63,7 +51,7 @@ def ReadData(self):
self.XTestRaw = self.ReadImageFile(self.test_image_file)
self.YTestRaw = self.ReadLabelFile(self.test_label_file)
self.num_example = self.XTrainRaw.shape[0]
self.num_category = len(np.unique(self.YTrainRaw))
self.num_category = (np.unique(self.YTrainRaw)).shape[0]
self.num_test = self.XTestRaw.shape[0]
self.num_train = self.num_example
if self.mode == "vector":
Expand Down Expand Up @@ -111,122 +99,69 @@ def ReadLabelFile(self, lable_file_name):
f.close()
return label_data

def Normalize(self):
self.NormalizeX()
self.NormalizeY()

def NormalizeX(self):
if self.XTrainRaw is not None:
self.X = self.__NormalizeData(self.XTrainRaw).astype(np.float32)
if self.XTestRaw is not None:
self.XTestSet = self.__NormalizeData(self.XTestRaw).astype(np.float32)

def NormalizeY(self):
self.Y = self.ToOneHot(self.YTrainRaw)
# no need to OneHot test set, we only need to get [0~9] from this set, instead of onehot encoded
if self.YTestRaw is not None:
self.YTestSet = self.ToOneHot(self.YTestRaw)

def ToOneHot(self, dataSet):
num = dataSet.shape[0]
Y = np.zeros((num, self.num_category))
for i in range(num):
n = (int)(dataSet[i])
Y[i,n] = 1
# end for
return Y
self.XTrain = self.__NormalizeData(self.XTrainRaw)
self.XTest = self.__NormalizeData(self.XTestRaw)

def __NormalizeData(self, XRawData):
X_NEW = np.zeros(XRawData.shape).astype(np.float32)
X_NEW = np.zeros(XRawData.shape)
x_max = np.max(XRawData)
x_min = np.min(XRawData)
X_NEW = (XRawData - x_min)/(x_max-x_min)
return X_NEW

# need explicitly call this function to generate validation set
def GenerateDevSet(self, k = 10):
self.num_validation = (int)(self.num_example / k)
# dev set
self.XDevSet = self.X[0:self.num_validation]
self.YDevSet = self.Y[0:self.num_validation]
# train set
self.XTrainSet = self.X[self.num_validation:]
self.YTrainSet = self.Y[self.num_validation:]

self.num_train = self.num_example - self.num_validation

def GetBatchTrainSamples(self, batch_size, iteration):
start = iteration * batch_size
end = start + batch_size
if self.num_validation == 0:
batch_X = self.X[start:end]
batch_Y = self.Y[start:end]
batch_X = self.XTrain[start:end]
batch_Y = self.YTrain[start:end]
else:
batch_X = self.XTrainSet[start:end]
batch_Y = self.YTrainSet[start:end]
batch_X = self.XTrain[start:end]
batch_Y = self.YTrain[start:end]
# end if

if self.mode == "vector":
return batch_X.reshape(batch_size, -1).T, batch_Y.T
return batch_X.reshape(-1, 784), batch_Y
elif self.mode == "image":
return batch_X, batch_Y.T
return batch_X, batch_Y

# recommend not use this function in DeepLearning
def GetBatchValidationSamples(self, batch_size, iteration):
start = iteration * batch_size
end = start + batch_size
if self.num_validation == 0:
batch_X = self.X[start:end]
batch_Y = self.Y[start:end]
else:
batch_X = self.XDevSet[start:end]
batch_Y = self.YDevSet[start:end]
# end if
def GetValidationSet(self):
batch_X = self.XDev
batch_Y = self.YDev
if self.mode == "vector":
return batch_X.reshape(batch_size, -1).T, batch_Y.T
return batch_X.reshape(self.num_validation, -1), batch_Y
elif self.mode == "image":
return batch_X, batch_Y.T
return batch_X, batch_Y

def GetDevSet(self):
batch_X = self.XDevSet
batch_Y = self.YDevSet
def GetTestSet(self):
if self.mode == "vector":
return batch_X.reshape(self.num_validation, -1).T, batch_Y.T
return self.XTest.reshape(self.num_test,-1), self.YTest
elif self.mode == "image":
return batch_X, batch_Y.T

return self.XTest, self.YTest

def GetBatchTestSamples(self, batch_size, iteration):
start = iteration * batch_size
end = start + batch_size
batch_X = self.XTestSet[start:end]
batch_Y = self.YTestSet[start:end]
batch_X = self.XTest[start:end]
batch_Y = self.YTest[start:end]

if self.mode == "vector":
return batch_X.reshape(batch_size, -1).T, batch_Y.T
return batch_X.reshape(batch_size, -1), batch_Y
elif self.mode == "image":
return batch_X, batch_Y.T
return batch_X, batch_Y

# permutation only affect along the first axis, so we need transpose the array first
# see the comment of this class to understand the data format
# suggest to call this function for each epoch
def Shuffle(self):
if self.num_validation == 0:
seed = np.random.randint(0,100)
np.random.seed(seed)
XP = np.random.permutation(self.X)
np.random.seed(seed)
YP = np.random.permutation(self.Y)
self.X = XP
self.Y = YP
return self.X, self.Y
else:
seed = np.random.randint(0,100)
np.random.seed(seed)
XP = np.random.permutation(self.XTrainSet)
np.random.seed(seed)
YP = np.random.permutation(self.YTrainSet)
self.XTrainSet = XP
self.YTrainSet = YP
return self.XTrainSet, self.YTrainSet
seed = np.random.randint(0,100)
np.random.seed(seed)
XP = np.random.permutation(self.XTrain)
np.random.seed(seed)
YP = np.random.permutation(self.YTrain)
self.XTrain = XP
self.YTrain = YP
return self.XTrain, self.YTrain

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit 649af90

Please sign in to comment.