Skip to content

Commit

Permalink
CLN: some pep-8 (line length) and comment cleanup, minor refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Jan 29, 2019
1 parent 27efc9c commit 75cd209
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 22 deletions.
29 changes: 17 additions & 12 deletions statsmodels/gam/gam.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from scipy import optimize
import pandas as pd

import statsmodels.base.wrapper as wrap

from statsmodels.discrete.discrete_model import Logit
from statsmodels.genmod.generalized_linear_model import (GLM, GLMResults,
GLMResultsWrapper, _check_convergence)
Expand All @@ -24,13 +26,13 @@
ValueWarning)
from statsmodels.tools.decorators import cache_readonly
from statsmodels.tools.data import _is_using_pandas

import statsmodels.base.wrapper as wrap

from statsmodels.tools.linalg import matrix_sqrt

from statsmodels.base._penalized import PenalizedMixin
from statsmodels.gam.gam_penalties import MultivariateGamPenalty
from statsmodels.tools.linalg import matrix_sqrt
from statsmodels.gam.gam_cross_validation.gam_cross_validation import (
MultivariateGAMCVPath)
from statsmodels.gam.gam_cross_validation.cross_validators import KFold


def _transform_predict_exog(model, exog, design_info=None):
Expand Down Expand Up @@ -214,7 +216,8 @@ def predict(self, exog=None, exog_smooth=None, transform=True, **kwargs):
else:
return predict_results

def get_prediction(self, exog=None, exog_smooth=None, transform=True, **kwargs):
def get_prediction(self, exog=None, exog_smooth=None, transform=True,
**kwargs):
"""compute prediction results
Parameters
Expand All @@ -234,8 +237,9 @@ def get_prediction(self, exog=None, exog_smooth=None, transform=True, **kwargs):
-------
prediction_results : generalized_linear_model.PredictionResults
The prediction results instance contains prediction and prediction
variance and can on demand calculate confidence intervals and summary
tables for the prediction of the mean and of new observations.
variance and can on demand calculate confidence intervals and
summary tables for the prediction of the mean and of new
observations.
"""
ex, exog_index = self._tranform_predict_exog(exog=exog,
Expand Down Expand Up @@ -445,6 +449,7 @@ def cv(self):
class GLMGamResultsWrapper(GLMResultsWrapper):
pass


wrap.populate_wrapper(GLMGamResultsWrapper, GLMGamResults)


Expand Down Expand Up @@ -535,7 +540,7 @@ def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
xnames = xnames_linear + self.smoother.col_names

if is_pandas and exog_linear is not None:
# we a dataframe so we can get a PandasData instance fro wrapping
# we a dataframe so we can get a PandasData instance for wrapping
exog = pd.DataFrame(exog, index=self.data_linear.row_labels,
columns=xnames)

Expand All @@ -548,8 +553,8 @@ def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
self.exog_names[:] = xnames

# TODO: the generic data handling might attach the design_info from the
# linear part, but this is incorrect for the full model and causes
# problems in wald_test_terms
# linear part, but this is incorrect for the full model and
# causes problems in wald_test_terms

if hasattr(self.data, 'design_info'):
del self.data.design_info
Expand Down Expand Up @@ -616,7 +621,8 @@ def fit(self, start_params=None, maxiter=1000, method='PIRLS', tol=1e-8,
else:
if max_start_irls > 0 and (start_params is None):
res = self._fit_pirls(self.alpha, start_params=start_params,
maxiter=max_start_irls, tol=tol, scale=scale,
maxiter=max_start_irls, tol=tol,
scale=scale,
cov_type=cov_type, cov_kwds=cov_kwds,
use_t=use_t, **kwargs)
start_params = res.params
Expand All @@ -642,7 +648,6 @@ def _fit_pirls(self, alpha, start_params=None, maxiter=100, tol=1e-8,
# alpha = alpha * len(y) * self.scale / 100
# TODO: we need to rescale alpha
endog = self.endog
k_exog_linear = self.k_exog_linear
wlsexog = self.exog # smoother.basis
spl_s = self.penal.penalty_matrix(alpha=alpha)

Expand Down
16 changes: 12 additions & 4 deletions statsmodels/gam/gam_cross_validation/gam_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def fit(self, **kwargs):

@abstractmethod
def _error(self, train_index, test_index, **kwargs):
# train the model on the train set and returns the error on the test set
# train the model on the train set
# and returns the error on the test set
pass


Expand All @@ -59,7 +60,7 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):
train_der_basis = smoother.der_basis[train_index]
train_der2_basis = smoother.der2_basis[train_index]
train_cov_der2 = smoother.cov_der2
# TODO: Double check this part. cov_der2 is calculated with all the data
# TODO: Double check this part. cov_der2 is calculated with all data
train_x = smoother.x[train_index]

train_smoothers.append(UnivariateGenericSmoother(train_x, train_basis,
Expand All @@ -70,7 +71,7 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):
test_der_basis = smoother.der_basis[test_index]
test_der2_basis = smoother.der2_basis[test_index]
test_cov_der2 = smoother.cov_der2
# TODO: Double check this part. cov_der2 is calculated with all the data
# TODO: Double check this part. cov_der2 is calculated with all data
test_x = smoother.x[test_index]

test_smoothers.append(UnivariateGenericSmoother(test_x, test_basis,
Expand All @@ -87,7 +88,6 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):

class MultivariateGAMCV(BaseCV):
def __init__(self, smoothers, alphas, gam, cost, y, cv):
# the gam class has already an instance
self.cost = cost
self.gam = gam
self.smoothers = smoothers
Expand All @@ -113,7 +113,10 @@ def _error(self, train_index, test_index, **kwargs):
class BasePenaltiesPathCV(with_metaclass(ABCMeta)):
"""
Base class for cross validation over a grid of parameters.
The best parameter is saved in alpha_cv
This class is currently not used
"""

def __init__(self, alphas):
Expand Down Expand Up @@ -141,6 +144,11 @@ def plot_path(self):


class MultivariateGAMCVPath(object):
"""k-fold cross-validation for GAM
Warning: The API of this class is preliminary and will change.
"""

def __init__(self, smoothers, alphas, gam, cost, y, cv):
self.cost = cost
Expand Down
6 changes: 3 additions & 3 deletions statsmodels/gam/gam_penalties.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ def __init__(self, multivariate_smoother, alpha, weights=None,

# TODO: Review this,
if weights is None:
# weights should hanve total length as params
# but it can also be scalar in individual
# weights should have total length as params
# but it can also be scalar in individual component
self.weights = [1. for _ in range(self.k_variables)]
else:
import warnings
warnings.warn('weights is currently ignored')
self.weights = weights

self.mask = [np.array([False] * self.k_params)
self.mask = [np.zeros(self.k_params, dtype=np.bool_)
for _ in range(self.k_variables)]
param_count = start_idx
for i, smoother in enumerate(self.multivariate_smoother.smoothers):
Expand Down
2 changes: 0 additions & 2 deletions statsmodels/gam/smooth_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,6 @@ def __init__(self, x, variable_names=None, include_intercept=False,
else:
self.include_intercept = include_intercept


if variable_names is None:
if data_names is not None:
self.variable_names = data_names
Expand Down Expand Up @@ -725,7 +724,6 @@ def __init__(self, x, df, degree, include_intercept=False,
super(BSplines, self).__init__(x, include_intercept=include_intercept,
variable_names=variable_names)


def _make_smoothers_list(self):
smoothers = []
for v in range(self.k_variables):
Expand Down
3 changes: 2 additions & 1 deletion statsmodels/gam/tests/test_penalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,8 @@ def test_predict(self):
res2 = self.res2
# this uses transform also for exog_linear
# predicted = res1.predict(self.exog[2:4], res1.model.smoother.x[2:4])
predicted = res1.predict(df_autos.iloc[2:4], res1.model.smoother.x[2:4])
predicted = res1.predict(df_autos.iloc[2:4],
res1.model.smoother.x[2:4])
assert_allclose(predicted, res1.fittedvalues[2:4],
rtol=1e-13)
assert_allclose(predicted, res2.fitted_values[2:4],
Expand Down

0 comments on commit 75cd209

Please sign in to comment.