Skip to content

Commit a0e69ab

Browse files
committed
ex1, ex3 et ex4
1 parent c3ce256 commit a0e69ab

File tree

5 files changed

+266
-33
lines changed

5 files changed

+266
-33
lines changed

classification.py

+74-7
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,29 @@
11
import numpy as np
22

3-
from sklearn.metrics.pairwise import euclidian_distance
43

54
def linear_regression(X, Y):
65
"""
76
Fit linear regression model on X, Y
87
"""
98
# Linear Regression
9+
X_2 = np.ones((X.shape[0], X.shape[1] + 1))
10+
X_2[:, :2] = X
11+
X = X_2
12+
1013
a = np.linalg.inv(np.dot(X.T, X))
1114
b = np.dot(X.T, Y)
1215
beta = np.dot(a, b)
1316

14-
u = (Y - np.dot(X, beta)).mean()
15-
return beta, u
17+
return beta[:, 2], u[2, 2]
1618

1719

1820
def logistic_regression(X, Y, max_iter=500, ridge=1e-10, verbose=True):
1921
"""
2022
Compute logisitic regression
2123
"""
24+
X_2 = np.ones((X.shape[0], X.shape[1] + 1))
25+
X_2[:, :2] = X
26+
X = X_2
2227
W = np.ones((X.shape[0], 1))
2328
theta = np.zeros((X.shape[1], 1))
2429

@@ -35,12 +40,74 @@ def logistic_regression(X, Y, max_iter=500, ridge=1e-10, verbose=True):
3540
if verbose:
3641
print "got out at iteration", iter
3742
break
43+
beta = theta[:2]
44+
u = theta[2]
45+
return beta, u
46+
47+
48+
def logistic_regression_predict(X, theta, gamma):
49+
"""
50+
Predict the label for X, depending on beta and gamma
51+
"""
52+
p = 1. / (1 + np.exp(-np.dot(X, theta) - gamma))
53+
Y = p > 0.5
54+
return Y.astype(int)
55+
56+
57+
def linear_regression_predict(X, theta, gamma):
58+
"""
59+
Predict the label for X with a linear regression model
60+
"""
61+
62+
p = np.dot(X, theta) + gamma
63+
Y = p > 0.5
64+
return Y.astype(int)
65+
66+
67+
def LDA(X, Y):
68+
n = Y.shape[0]
69+
p = Y.sum() / n
70+
m_1 = (Y * X).sum(axis=0) * 1 / (Y.sum())
71+
m_0 = ((1 - Y) * X).sum(axis=0) * 1 / ((1 - Y).sum())
72+
73+
74+
a = np.dot((X -m_1).T,(Y * (X - m_1)))
75+
b = np.dot((X - m_0).T, ((1 - Y) * (X - m_0)))
76+
S = 1. / n * (a + b)
77+
78+
S_inv = np.linalg.inv(S)
79+
80+
beta = np.dot(S_inv, (m_1 - m_0))
81+
82+
c = np.dot(np.dot((m_1 - m_0).T, S_inv), m_1 - m_0)
83+
d = np.log(p / (1 - p))
84+
gamma = - 1. / 2 * c + d
85+
return beta, gamma
86+
87+
88+
def QDA(X, Y):
89+
n = Y.shape[0]
90+
p = Y.sum() / n
91+
m_1 = (Y * X).sum(axis=0) * 1 / (Y.sum())
92+
m_0 = ((1 - Y) * X).sum(axis=0) * 1 / ((1 - Y).sum())
93+
94+
S_1 = (Y * (X - m_1)).sum(axis=0) / (Y.sum())
95+
S_0 = ((1 - Y) * (X - m_0)).sum(axis=0) / ((1 - Y).sum())
96+
stop
97+
S_inv = np.linalg.inv(S)
98+
99+
beta = np.dot(S_inv, (m_1 - m_0))
100+
101+
c = np.dot(np.dot((m_1 - m_0).T, S_inv), m_1 - m_0)
102+
d = np.log(p / (1 - p))
103+
gamma = - 1. / 2 * c + d
38104

39-
u = (Y - np.dot(X, theta)).mean()
40-
return theta, u
105+
return beta, gamma
41106

42107

43-
def error(Y, Yt):
44-
return (Y - Yt)**2.sum()
108+
def error(Yt, Y):
109+
Yt.shape = Y.shape
110+
error = Y != Yt
111+
return error.astype(float).sum() / Y.shape[0]
45112

46113

ex_1.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import numpy as np
2+
from matplotlib import pyplot as plt
3+
4+
from utils import load_data
5+
from classification import LDA
6+
7+
X, Y = load_data('classificationA.train')
8+
9+
beta, u = LDA(X, Y)
10+
#u = (Y - np.dot(X, beta)).mean()
11+
12+
fig = plt.figure(1)
13+
colors = ['#4EACC5', '#FF9C34', '#4E9A06']
14+
my_members = Y == 0
15+
my_members.shape = (my_members.shape[0])
16+
ax = fig.add_subplot(1, 1, 1)
17+
ax.plot(X[my_members, 0], X[my_members, 1],
18+
'w', markerfacecolor=colors[0], marker = '.')
19+
20+
my_members = Y == 1
21+
my_members.shape = (my_members.shape[0])
22+
ax.plot(X[my_members, 0], X[my_members, 1],
23+
'w', markerfacecolor=colors[1], marker = '.')
24+
25+
x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
26+
y_beta = (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
27+
ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
28+
plt.show()
29+
30+
31+
32+
33+
34+

ex_2.py

+1-26
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#
22
# IRLS
33
#
4-
# FIXME constante missing !!!!
54

65
import numpy as np
76
import pylab as pl
@@ -11,32 +10,8 @@
1110

1211
verbose = True
1312
max_iter = 500
14-
1513
X, Y = load_data('classificationA.train')
16-
W = np.ones((X.shape[0], 1))
17-
ridge = 1e-10
18-
theta = np.zeros((X.shape[1], 1))
19-
20-
for iter in range(max_iter):
21-
print iter
22-
old = theta.copy()
23-
h = np.dot(X, theta)
24-
m = 1. / (1 + np.exp(-h))
25-
W = np.diag((m * (1 - m)).flatten())
26-
a = np.dot(X.T, (Y - m))
27-
b = np.linalg.inv(np.dot(X.T, np.dot(W, X)))
28-
theta = theta + np.dot(b, a)
29-
30-
if ((old - theta)**2).sum() < ridge:
31-
if verbose:
32-
print "got out at iteration", iter
33-
break
34-
35-
u = (Y - np.dot(X, theta)).mean()
36-
3714
beta, u = logistic_regression(X, Y)
38-
# Calculate the line p(y = 1|x) = 0.5
39-
4015

4116
# Plot
4217
fig = pl.figure(1)
@@ -53,7 +28,7 @@
5328
'w', markerfacecolor=colors[1], marker = '.')
5429

5530
x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
56-
y_beta = (0.5 - u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
31+
y_beta = (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
5732
ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
5833
pl.show()
5934

ex_4.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# -*- coding: utf-8 -*-
2+
import numpy as np
3+
4+
import classification
5+
from utils import load_data
6+
# Calculate the errors for the three datasets
7+
8+
XA, YA = load_data('classificationA.train')
9+
XB, YB = load_data('classificationB.train')
10+
XC, YC = load_data('classificationC.train')
11+
XtA, YtA = load_data('classificationA.test')
12+
XtB, YtB = load_data('classificationB.test')
13+
XtC, YtC = load_data('classificationC.test')
14+
15+
# Jeu de données A
16+
17+
print "Jeu de données A"
18+
print "****************"
19+
print
20+
21+
beta, u = classification.LDA(XA, YA)
22+
YtcA = classification.logistic_regression_predict(XtA, beta, u)
23+
erreur = classification.error(YtcA, YtA)
24+
25+
print "Jeu de test A - Modèle LDA: erreur %s" % erreur
26+
27+
beta, u = classification.logistic_regression(XA, YA, verbose=False)
28+
YtcA = classification.logistic_regression_predict(XtA, beta, u)
29+
erreur = classification.error(YtcA, YtA)
30+
31+
print "Jeu de test A - Regression logisitique: erreur %s" % erreur
32+
33+
beta, u = classification.linear_regression(XA, YA)
34+
YtcA = classification.linear_regression_predict(XtA, beta, u)
35+
erreur = classification.error(YtcA, YtA)
36+
37+
print "Jeu de test A - Regression linéaire: erreur %s" % erreur
38+
39+
# Jeu de données B
40+
print
41+
print
42+
print "Jeu de données B"
43+
print "****************"
44+
print
45+
46+
beta, u = classification.LDA(XB, YB)
47+
YtcB = classification.logistic_regression_predict(XtB, beta, u)
48+
erreur = classification.error(YtcB, YtB)
49+
50+
print "Jeu de test B - Modèle LDA: erreur %s" % erreur
51+
52+
beta, u = classification.logistic_regression(XB, YB, verbose=False)
53+
YtcB = classification.logistic_regression_predict(XtB, beta, u)
54+
erreur = classification.error(YtcB, YtB)
55+
56+
print "Jeu de test B - Regression logisitique: erreur %s" % erreur
57+
58+
beta, u = classification.linear_regression(XB, YB)
59+
YtcB = classification.linear_regression_predict(XtB, beta, u)
60+
erreur = classification.error(YtcB, YtB)
61+
62+
print "Jeu de test B - Regression linéaire: erreur %s" % erreur
63+
64+
# Jeu de données C
65+
print
66+
print
67+
print "Jeu de données C"
68+
print "****************"
69+
print
70+
71+
72+
beta, u = classification.LDA(XC, YC)
73+
YtcC = classification.logistic_regression_predict(XtC, beta, u)
74+
erreur = classification.error(YtcC, YtC)
75+
76+
print "Jeu de test C - Modèle LDA: erreur %s" % erreur
77+
78+
beta, u = classification.logistic_regression(XC, YC, verbose=False)
79+
YtcC = classification.logistic_regression_predict(XtC, beta, u)
80+
erreur = classification.error(YtcC, YtC)
81+
82+
print "Jeu de test C - Regression logisitique: erreur %s" % erreur
83+
84+
beta, u = classification.linear_regression(XC, YC)
85+
YtcC = classification.linear_regression_predict(XtC, beta, u)
86+
erreur = classification.error(YtcC, YtC)
87+
88+
print "Jeu de test C - Regression linéaire: erreur %s" % erreur
89+
90+

plot.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# -*- coding: utf-8 -*-
2+
import numpy as np
3+
4+
import classification
5+
from utils import load_data
6+
7+
from matplotlib import pyplot as plt
8+
9+
# Calculate the errors for the three datasets
10+
11+
XA, YA = load_data('classificationA.train')
12+
XB, YB = load_data('classificationB.train')
13+
XC, YC = load_data('classificationC.train')
14+
XtA, YtA = load_data('classificationA.test')
15+
XtB, YtB = load_data('classificationB.test')
16+
XtC, YtC = load_data('classificationC.test')
17+
18+
# Let's plot the data
19+
20+
def plot(X, Y, XtA, title="ClassificationA.png"):
21+
fig = plt.figure()
22+
colors = ['#4EACC5', '#FF9C34', '#4E9A06', '#00465F', "#7E2007"]
23+
my_members = Y == 0
24+
my_members.shape = (my_members.shape[0])
25+
ax = fig.add_subplot(1, 1, 1)
26+
ax.plot(X[my_members, 0], X[my_members, 1],
27+
'w', markerfacecolor=colors[0], marker = '.')
28+
29+
my_members = Y == 1
30+
my_members.shape = (my_members.shape[0])
31+
ax.plot(X[my_members, 0], X[my_members, 1],
32+
'w', markerfacecolor=colors[1], marker = '.')
33+
34+
35+
beta, u = classification.LDA(X, Y)
36+
YtcA = classification.logistic_regression_predict(XtA, beta, u)
37+
x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
38+
y_beta = (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
39+
ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
40+
41+
42+
beta, u = classification.logistic_regression(X, Y, verbose=False)
43+
x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
44+
y_beta = (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
45+
ax.plot(x_beta, y_beta, color=colors[3], linewidth=1)
46+
47+
YtcA = classification.logistic_regression_predict(XtA, beta, u)
48+
49+
beta, u = classification.linear_regression(X, Y)
50+
YtcA = classification.linear_regression_predict(XtA, beta, u)
51+
x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
52+
y_beta = (0.5 - u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
53+
ax.plot(x_beta, y_beta, color=colors[4], linewidth=1)
54+
55+
labels = ('label 0', 'label 1', 'LDA model', 'logistic regression', 'linear regression')
56+
legend = plt.legend(labels, loc=(0.9, .95), labelspacing=0.1)
57+
plt.setp(legend.get_texts(), fontsize='small')
58+
59+
plt.show()
60+
plt.savefig(title)
61+
62+
63+
64+
plot(XA, YA, XtA, title="classificationA.png")
65+
plot(XB, YB, XtB, title="classificationB.png")
66+
plot(XC, YC, XtC, title="classificationC.png")
67+

0 commit comments

Comments
 (0)