-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCNN_regression.py
122 lines (100 loc) · 3.92 KB
/
CNN_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
from utils import Model
import numpy as np
musinsa_regression = Model()
# musinsa_data폴더의 데이터로 학습 + 결과 출력
musinsa_regression.train()
# 사진 예측
print(musinsa_regression.predict(np.array([])))
'''
from PIL import Image
import os, glob
import numpy as np
import pandas as pd
from tensorflow.keras.utils import HDF5Matrix
import h5py
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
# VGG16
def create_CNN():
base = ResNet50(weights=None, include_top=False, input_shape=(224, 224, 3))
fc = Flatten()(base.output)
#fc = Dense(units=1000, activation='relu', kernel_initializer='he_normal')(fc)
fc = Dense(units=1, activation='linear', kernel_initializer='he_normal')(fc)
model = Model(inputs=base.inputs, outputs=fc)
model.compile(loss='mae',
optimizer='adam',
metrics=['mae', 'mse'])
model.summary()
return model
def train(train_images, train_labels, valid_images, valid_labels):
# PARAMETERS
epoch = 5
batch_size = 32
early = EarlyStopping(monitor='val_mae', min_delta=0, patience=10, verbose=1, mode='auto')
# BUILD YOUR MODEL
model = create_CNN()
model.summary()
hist = model.fit(train_images, train_labels, epochs=epoch, batch_size=batch_size, shuffle='batch',
validation_data=(valid_images, valid_labels), verbose=1)
plt.plot(hist.history['loss'], 'b-', label="training")
plt.plot(hist.history['val_loss'], 'r:', label="validation")
plt.legend()
plt.savefig(os.path.join(item_path, 'temp/loss.png'), dpi=300)
plt.show()
# Make prediction data frame
valid_pred = model.predict(valid_images)
pred_labels = valid_labels
# index = np.argsort(pred_labels)
index = np.arange(valid_pred.shape[0])
scatter = plt.scatter(index, pred_labels, c='b', s=0.1, label='test label')
scatter = plt.scatter(index, valid_pred, c='r', s=0.1, label='predict')
plt.legend()
plt.savefig(os.path.join(item_path, 'temp/predict.png'), dpi=300)
plt.show()
h5_file = os.path.join(item_path, 'temp/model.h5')
model.save(h5_file)
return model
if __name__ == '__main__':
# 파일 경로 지정
current_dir = os.path.join(os.getcwd(), './')
item_path = os.path.join(current_dir, 'temp/')
item_file = os.path.join(item_path, 'items.csv')
item_csv = pd.read_csv(item_file, encoding='utf-8').fillna('0')
# remove illegal category
item_csv = item_csv.drop(item_csv.loc[item_csv['category'].str.find('>') == -1].index)
# korean to index
#item_csv['category'] = item_list[i]
image_file = os.path.join('images.hdf5')
'''
# shuffle
index = np.argsort(Y)[:-1]
np.random.shuffle(index)
#rand_index = np.random.permutation(num_X)
X = X[index] / 255.0
# 좀더 선형으로 만들기
Y = np.log(Y[index] + 1)
Y = (Y - Y.min()) / (Y.max() - Y.min())
#Y = np.arctan(Y - Y.mean())
'''
Y = item_csv['pageView'].astype(int)
with h5py.File(item_path + image_file, 'a') as f:
num_X = len(f['image'])
# random index
#index = np.random.permutation(num_X)
Y = item_csv[item_csv['id'].isin(f['index'])]['pageView'].astype(int).to_numpy()
# 선형으로
Y = np.log(Y + 1)
Y = (Y - np.min(Y)) / (np.max(Y) - np.min(Y))
# train
train_images = HDF5Matrix(item_path+image_file, 'image', end=int(num_X * 0.9))
train_labels = Y[:int(num_X * 0.9)]
valid_images = HDF5Matrix(item_path+image_file, 'image', start=int(num_X * 0.9))
valid_labels = Y[int(num_X * 0.9):]
a = train(train_images, train_labels, valid_images, valid_labels)