Skip to content

Commit

Permalink
Update Score Prediction by Multi Linear Regression...!
Browse files Browse the repository at this point in the history
  • Loading branch information
KasunHewagama committed Oct 8, 2020
1 parent 8074a88 commit 51efa82
Showing 1 changed file with 28 additions and 19 deletions.
47 changes: 28 additions & 19 deletions models/Score_Prediction_by_GS_MLR.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,15 @@
from scipy import stats
import matplotlib.pyplot as plt
from dataset.cleanDs import cleanDs

from sklearn.model_selection import train_test_split

cleanData = cleanDs()
df = cleanData.clean_db()

# Creating plot with Global_Sales & User_Score
x = df['Global_Sales']
y = df['User_Score']

# plt.xlim(0,20)
# plt.ylim(0,12)

slope, intercept, r, p, std_err = stats.linregress(x, y)


Expand All @@ -38,12 +35,10 @@ def myfunc(x):

print(numpy.corrcoef(x, y))

# Creating plot with Critic_Score & User_Score
x = df['Critic_Score']
y = df['User_Score']

# plt.xlim(0,20)
# plt.ylim(0,12)

slope, intercept, r, p, std_err = stats.linregress(x, y)


Expand All @@ -63,46 +58,50 @@ def myfunc(x):

print(numpy.corrcoef(x, y))


# Creating plot with Critic_Count & User_Score
x = df['Critic_Count']
y = df['User_Score']

#plt.xlim(0,5)
#plt.ylim(0,1000)

slope, intercept, r, p, std_err = stats.linregress(x, y)


def myfunc(x):
return slope * x + intercept


mymodel = list(map(myfunc, x))
plt.scatter(x,y , color='green')
plt.scatter(x, y, color='green')
plt.title('Critic_Count Vs User_Score', fontsize=14)
plt.xlabel('Critic_Count', fontsize=14)
plt.ylabel('User_Score', fontsize=14)
plt.grid(True)
plt.plot(x, mymodel)
plt.show()
import numpy
print(numpy.corrcoef(x,y))

print(numpy.corrcoef(x, y))



import pandas as pd
from sklearn import linear_model
import statsmodels.api as sm

# df = pd.df(Stock_Market,columns=['Year','Month','Interest_Rate','Unemployment_Rate','Stock_Index_Price'])

X = df[['Global_Sales', 'Critic_Count',
'Critic_Score']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
X = df[['Critic_Score','Global_Sales','Critic_Count']]
Y = df['User_Score']

X_train, X_test,Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=0)

# with sklearn
regr = linear_model.LinearRegression()
regr.fit(X, Y)
y_pred = regr.predict(X_test)

print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)

print(pd.DataFrame({'Actual':Y_test,'Predicted':y_pred}))

# prediction with sklearn
Global_Sales = 23.21
Critic_Score = 91
Expand All @@ -111,10 +110,20 @@ def myfunc(x):
print('Predicted User Score: \n', regr.predict([[Global_Sales, Critic_Score, Critic_Count]]))

# with statsmodels
X = sm.add_constant(X) # adding a constant
X = sm.add_constant(X)

model = sm.OLS(Y, X).fit()
predictions = model.predict(X)

print_model = model.summary()
print(print_model)
print(print_model)

# To find Accuracy
import sklearn.metrics as sm
print("Mean absolute error =", round(sm.mean_absolute_error(Y_test,y_pred), 2))
print("Mean squared error =", round(sm.mean_squared_error(Y_test,y_pred), 2))
print("Median absolute error =", round(sm.median_absolute_error(Y_test,y_pred), 2))
print("Explain variance score =", round(sm.explained_variance_score(Y_test,y_pred), 2))
print("R2 score =", round(sm.r2_score(Y_test,y_pred), 2))

print(regr.score(X_test,Y_test))

0 comments on commit 51efa82

Please sign in to comment.