ex1, ex3 et ex4

NelleV · NelleV · commit a0e69ab123bf · 2011-10-25T10:57:25.000+02:00
diff --git a/classification.py b/classification.py
@@ -1,24 +1,29 @@
 import numpy as np
 
-from sklearn.metrics.pairwise import euclidian_distance
 
 def linear_regression(X, Y):
     """
     Fit linear regression model on X, Y
     """
     # Linear Regression
+    X_2 = np.ones((X.shape[0], X.shape[1] + 1))
+    X_2[:, :2] = X
+    X = X_2
+
     a = np.linalg.inv(np.dot(X.T, X))
     b = np.dot(X.T, Y)
     beta = np.dot(a, b)
 
-    u = (Y - np.dot(X, beta)).mean()
-    return beta, u
+    return beta[:, 2], u[2, 2]
 
 
 def logistic_regression(X, Y, max_iter=500, ridge=1e-10, verbose=True):
     """
     Compute logisitic regression
     """
+    X_2 = np.ones((X.shape[0], X.shape[1] + 1))
+    X_2[:, :2] = X
+    X = X_2
     W = np.ones((X.shape[0], 1))
     theta = np.zeros((X.shape[1], 1))
 
@@ -35,12 +40,74 @@ def logistic_regression(X, Y, max_iter=500, ridge=1e-10, verbose=True):
             if verbose:
                 print "got out at iteration", iter
             break
+    beta = theta[:2]
+    u = theta[2]
+    return beta, u
+
+
+def logistic_regression_predict(X, theta, gamma):
+    """
+    Predict the label for X, depending on beta and gamma
+    """
+    p = 1. / (1 + np.exp(-np.dot(X, theta) - gamma))
+    Y = p > 0.5
+    return Y.astype(int)
+
+
+def linear_regression_predict(X, theta, gamma):
+    """
+    Predict the label for X with a linear regression model
+    """
+
+    p = np.dot(X, theta) + gamma
+    Y = p > 0.5
+    return Y.astype(int)
+
+
+def LDA(X, Y):
+    n = Y.shape[0]
+    p = Y.sum() / n
+    m_1 = (Y * X).sum(axis=0) * 1 / (Y.sum())
+    m_0 = ((1 - Y) * X).sum(axis=0) * 1 / ((1 - Y).sum())
+
+
+    a = np.dot((X -m_1).T,(Y * (X - m_1)))
+    b = np.dot((X - m_0).T, ((1 - Y) * (X - m_0)))
+    S = 1. / n * (a + b)
+
+    S_inv = np.linalg.inv(S)
+
+    beta = np.dot(S_inv, (m_1 - m_0))
+
+    c = np.dot(np.dot((m_1 - m_0).T, S_inv), m_1 - m_0)
+    d = np.log(p / (1 - p))
+    gamma = - 1. / 2 * c + d
+    return beta, gamma
+
+
+def QDA(X, Y):
+    n = Y.shape[0]
+    p = Y.sum() / n
+    m_1 = (Y * X).sum(axis=0) * 1 / (Y.sum())
+    m_0 = ((1 - Y) * X).sum(axis=0) * 1 / ((1 - Y).sum())
+
+    S_1 = (Y * (X - m_1)).sum(axis=0) / (Y.sum())
+    S_0 = ((1 - Y) * (X - m_0)).sum(axis=0) / ((1 - Y).sum())
+    stop
+    S_inv = np.linalg.inv(S)
+
+    beta = np.dot(S_inv, (m_1 - m_0))
+
+    c = np.dot(np.dot((m_1 - m_0).T, S_inv), m_1 - m_0)
+    d = np.log(p / (1 - p))
+    gamma = - 1. / 2 * c + d
 
-    u = (Y - np.dot(X, theta)).mean()
-    return theta, u
+    return beta, gamma
 
 
-def error(Y, Yt):
-    return (Y - Yt)**2.sum()
+def error(Yt, Y):
+    Yt.shape = Y.shape
+    error = Y != Yt
+    return error.astype(float).sum() / Y.shape[0]
 
 
diff --git a/ex_1.py b/ex_1.py
@@ -0,0 +1,34 @@
+import numpy as np
+from matplotlib import pyplot as plt
+
+from utils import load_data
+from classification import LDA
+
+X, Y = load_data('classificationA.train')
+
+beta, u = LDA(X, Y) 
+#u = (Y - np.dot(X, beta)).mean()
+
+fig = plt.figure(1)
+colors = ['#4EACC5', '#FF9C34', '#4E9A06']
+my_members = Y == 0
+my_members.shape = (my_members.shape[0])
+ax = fig.add_subplot(1, 1, 1)
+ax.plot(X[my_members, 0], X[my_members, 1],
+        'w', markerfacecolor=colors[0], marker = '.')
+
+my_members = Y == 1
+my_members.shape = (my_members.shape[0])
+ax.plot(X[my_members, 0], X[my_members, 1],
+        'w', markerfacecolor=colors[1], marker = '.')
+
+x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
+y_beta =  (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
+ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
+plt.show()
+
+
+
+
+
+
diff --git a/ex_2.py b/ex_2.py
@@ -1,7 +1,6 @@
 #
 # IRLS
 #
-# FIXME constante missing !!!!
 
 import numpy as np
 import pylab as pl
@@ -11,32 +10,8 @@
 
 verbose = True
 max_iter = 500
-
 X, Y = load_data('classificationA.train')
-W = np.ones((X.shape[0], 1))
-ridge = 1e-10
-theta = np.zeros((X.shape[1], 1))
-
-for iter in range(max_iter):
-    print iter
-    old = theta.copy()
-    h = np.dot(X, theta)
-    m = 1. / (1 + np.exp(-h))
-    W = np.diag((m * (1 - m)).flatten())
-    a = np.dot(X.T, (Y - m))
-    b = np.linalg.inv(np.dot(X.T, np.dot(W, X)))
-    theta = theta + np.dot(b, a)
-
-    if ((old - theta)**2).sum() < ridge:
-        if verbose:
-            print "got out at iteration", iter
-        break
-
-u = (Y - np.dot(X, theta)).mean()
-
 beta, u = logistic_regression(X, Y)
-# Calculate the line p(y = 1|x) = 0.5
-
 
 # Plot
 fig = pl.figure(1)
@@ -53,7 +28,7 @@
         'w', markerfacecolor=colors[1], marker = '.')
 
 x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
-y_beta =  (0.5 - u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
+y_beta =  (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
 ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
 pl.show()
 
diff --git a/ex_4.py b/ex_4.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+
+import classification
+from utils import load_data
+# Calculate the errors for the three datasets
+
+XA, YA = load_data('classificationA.train')
+XB, YB = load_data('classificationB.train')
+XC, YC = load_data('classificationC.train')
+XtA, YtA = load_data('classificationA.test')
+XtB, YtB = load_data('classificationB.test')
+XtC, YtC = load_data('classificationC.test')
+
+# Jeu de données A
+
+print "Jeu de données A"
+print "****************"
+print
+
+beta, u = classification.LDA(XA, YA)
+YtcA = classification.logistic_regression_predict(XtA, beta, u)
+erreur = classification.error(YtcA, YtA)
+
+print "Jeu de test A - Modèle LDA: erreur %s" % erreur
+
+beta, u = classification.logistic_regression(XA, YA, verbose=False)
+YtcA = classification.logistic_regression_predict(XtA, beta, u)
+erreur = classification.error(YtcA, YtA)
+
+print "Jeu de test A - Regression logisitique: erreur %s" % erreur
+
+beta, u = classification.linear_regression(XA, YA)
+YtcA = classification.linear_regression_predict(XtA, beta, u)
+erreur = classification.error(YtcA, YtA)
+
+print "Jeu de test A - Regression linéaire: erreur %s" % erreur
+
+# Jeu de données B
+print
+print
+print "Jeu de données B"
+print "****************"
+print
+
+beta, u = classification.LDA(XB, YB)
+YtcB = classification.logistic_regression_predict(XtB, beta, u)
+erreur = classification.error(YtcB, YtB)
+
+print "Jeu de test B - Modèle LDA: erreur %s" % erreur
+
+beta, u = classification.logistic_regression(XB, YB, verbose=False)
+YtcB = classification.logistic_regression_predict(XtB, beta, u)
+erreur = classification.error(YtcB, YtB)
+
+print "Jeu de test B - Regression logisitique: erreur %s" % erreur
+
+beta, u = classification.linear_regression(XB, YB)
+YtcB = classification.linear_regression_predict(XtB, beta, u)
+erreur = classification.error(YtcB, YtB)
+
+print "Jeu de test B - Regression linéaire: erreur %s" % erreur
+
+# Jeu de données C
+print
+print
+print "Jeu de données C"
+print "****************"
+print
+
+
+beta, u = classification.LDA(XC, YC)
+YtcC = classification.logistic_regression_predict(XtC, beta, u)
+erreur = classification.error(YtcC, YtC)
+
+print "Jeu de test C - Modèle LDA: erreur %s" % erreur
+
+beta, u = classification.logistic_regression(XC, YC, verbose=False)
+YtcC = classification.logistic_regression_predict(XtC, beta, u)
+erreur = classification.error(YtcC, YtC)
+
+print "Jeu de test C - Regression logisitique: erreur %s" % erreur
+
+beta, u = classification.linear_regression(XC, YC)
+YtcC = classification.linear_regression_predict(XtC, beta, u)
+erreur = classification.error(YtcC, YtC)
+
+print "Jeu de test C - Regression linéaire: erreur %s" % erreur
+
+
diff --git a/plot.py b/plot.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+
+import classification
+from utils import load_data
+
+from matplotlib import pyplot as plt
+
+# Calculate the errors for the three datasets
+
+XA, YA = load_data('classificationA.train')
+XB, YB = load_data('classificationB.train')
+XC, YC = load_data('classificationC.train')
+XtA, YtA = load_data('classificationA.test')
+XtB, YtB = load_data('classificationB.test')
+XtC, YtC = load_data('classificationC.test')
+
+# Let's plot the data
+
+def plot(X, Y, XtA, title="ClassificationA.png"):
+    fig = plt.figure()
+    colors = ['#4EACC5', '#FF9C34', '#4E9A06', '#00465F', "#7E2007"]
+    my_members = Y == 0
+    my_members.shape = (my_members.shape[0])
+    ax = fig.add_subplot(1, 1, 1)
+    ax.plot(X[my_members, 0], X[my_members, 1],
+            'w', markerfacecolor=colors[0], marker = '.')
+
+    my_members = Y == 1
+    my_members.shape = (my_members.shape[0])
+    ax.plot(X[my_members, 0], X[my_members, 1],
+            'w', markerfacecolor=colors[1], marker = '.')
+
+
+    beta, u = classification.LDA(X, Y)
+    YtcA = classification.logistic_regression_predict(XtA, beta, u)
+    x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
+    y_beta =  (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
+    ax.plot(x_beta, y_beta, color=colors[2], linewidth=1)
+
+
+    beta, u = classification.logistic_regression(X, Y, verbose=False)
+    x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
+    y_beta =  (- u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
+    ax.plot(x_beta, y_beta, color=colors[3], linewidth=1)
+
+    YtcA = classification.logistic_regression_predict(XtA, beta, u)
+
+    beta, u = classification.linear_regression(X, Y)
+    YtcA = classification.linear_regression_predict(XtA, beta, u)
+    x_beta = [[i] for i in np.linspace(X.min(), X.max(), 100)]
+    y_beta =  (0.5 - u - beta[0] * np.linspace(X.min(), X.max(), 100)) * 1 / beta[1]
+    ax.plot(x_beta, y_beta, color=colors[4], linewidth=1)
+
+    labels = ('label 0', 'label 1', 'LDA model', 'logistic regression', 'linear regression')
+    legend = plt.legend(labels, loc=(0.9, .95), labelspacing=0.1)
+    plt.setp(legend.get_texts(), fontsize='small')
+
+    plt.show()
+    plt.savefig(title)
+    
+
+
+plot(XA, YA, XtA, title="classificationA.png")
+plot(XB, YB, XtB, title="classificationB.png")
+plot(XC, YC, XtC, title="classificationC.png")
+