CLN: some pep-8 (line length) and comment cleanup, minor refactor

Vinod-Kumar-G · Jan 29, 2019 · 75cd209 · 75cd209
1 parent 27efc9c
commit 75cd209
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 22 deletions.
diff --git a/statsmodels/gam/gam.py b/statsmodels/gam/gam.py
@@ -15,6 +15,8 @@
 from scipy import optimize
 import pandas as pd
 
+import statsmodels.base.wrapper as wrap
+
 from statsmodels.discrete.discrete_model import Logit
 from statsmodels.genmod.generalized_linear_model import (GLM, GLMResults,
  GLMResultsWrapper, _check_convergence)
@@ -24,13 +26,13 @@
  ValueWarning)
 from statsmodels.tools.decorators import cache_readonly
 from statsmodels.tools.data import _is_using_pandas
-
-import statsmodels.base.wrapper as wrap
-
+from statsmodels.tools.linalg import matrix_sqrt
 
 from statsmodels.base._penalized import PenalizedMixin
 from statsmodels.gam.gam_penalties import MultivariateGamPenalty
-from statsmodels.tools.linalg import matrix_sqrt
+from statsmodels.gam.gam_cross_validation.gam_cross_validation import (
+ MultivariateGAMCVPath)
+from statsmodels.gam.gam_cross_validation.cross_validators import KFold
 
 
 def _transform_predict_exog(model, exog, design_info=None):
@@ -214,7 +216,8 @@ def predict(self, exog=None, exog_smooth=None, transform=True, **kwargs):
  else:
  return predict_results
 
- def get_prediction(self, exog=None, exog_smooth=None, transform=True, **kwargs):
+ def get_prediction(self, exog=None, exog_smooth=None, transform=True,
+ **kwargs):
  """compute prediction results
 
  Parameters
@@ -234,8 +237,9 @@ def get_prediction(self, exog=None, exog_smooth=None, transform=True, **kwargs):
  -------
  prediction_results : generalized_linear_model.PredictionResults
  The prediction results instance contains prediction and prediction
- variance and can on demand calculate confidence intervals and summary
- tables for the prediction of the mean and of new observations.
+ variance and can on demand calculate confidence intervals and
+ summary tables for the prediction of the mean and of new
+ observations.
 
  """
  ex, exog_index = self._tranform_predict_exog(exog=exog,
@@ -445,6 +449,7 @@ def cv(self):
 class GLMGamResultsWrapper(GLMResultsWrapper):
  pass
 
+
 wrap.populate_wrapper(GLMGamResultsWrapper, GLMGamResults)
 
 
@@ -535,7 +540,7 @@ def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
  xnames = xnames_linear + self.smoother.col_names
 
  if is_pandas and exog_linear is not None:
- # we a dataframe so we can get a PandasData instance fro wrapping
+ # we a dataframe so we can get a PandasData instance for wrapping
  exog = pd.DataFrame(exog, index=self.data_linear.row_labels,
  columns=xnames)
 
@@ -548,8 +553,8 @@ def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
  self.exog_names[:] = xnames
 
  # TODO: the generic data handling might attach the design_info from the
- # linear part, but this is incorrect for the full model and causes
- # problems in wald_test_terms
+ # linear part, but this is incorrect for the full model and
+ # causes problems in wald_test_terms
 
  if hasattr(self.data, 'design_info'):
  del self.data.design_info
@@ -616,7 +621,8 @@ def fit(self, start_params=None, maxiter=1000, method='PIRLS', tol=1e-8,
  else:
  if max_start_irls > 0 and (start_params is None):
  res = self._fit_pirls(self.alpha, start_params=start_params,
- maxiter=max_start_irls, tol=tol, scale=scale,
+ maxiter=max_start_irls, tol=tol,
+ scale=scale,
  cov_type=cov_type, cov_kwds=cov_kwds,
  use_t=use_t, **kwargs)
  start_params = res.params
@@ -642,7 +648,6 @@ def _fit_pirls(self, alpha, start_params=None, maxiter=100, tol=1e-8,
  # alpha = alpha * len(y) * self.scale / 100
  # TODO: we need to rescale alpha
  endog = self.endog
- k_exog_linear = self.k_exog_linear
  wlsexog = self.exog # smoother.basis
  spl_s = self.penal.penalty_matrix(alpha=alpha)
 

diff --git a/statsmodels/gam/gam_cross_validation/gam_cross_validation.py b/statsmodels/gam/gam_cross_validation/gam_cross_validation.py
@@ -47,7 +47,8 @@ def fit(self, **kwargs):
 
  @abstractmethod
  def _error(self, train_index, test_index, **kwargs):
- # train the model on the train set and returns the error on the test set
+ # train the model on the train set
+ # and returns the error on the test set
  pass
 
 
@@ -59,7 +60,7 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):
  train_der_basis = smoother.der_basis[train_index]
  train_der2_basis = smoother.der2_basis[train_index]
  train_cov_der2 = smoother.cov_der2
- # TODO: Double check this part. cov_der2 is calculated with all the data
+ # TODO: Double check this part. cov_der2 is calculated with all data
  train_x = smoother.x[train_index]
 
  train_smoothers.append(UnivariateGenericSmoother(train_x, train_basis,
@@ -70,7 +71,7 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):
  test_der_basis = smoother.der_basis[test_index]
  test_der2_basis = smoother.der2_basis[test_index]
  test_cov_der2 = smoother.cov_der2
- # TODO: Double check this part. cov_der2 is calculated with all the data
+ # TODO: Double check this part. cov_der2 is calculated with all data
  test_x = smoother.x[test_index]
 
  test_smoothers.append(UnivariateGenericSmoother(test_x, test_basis,
@@ -87,7 +88,6 @@ def _split_train_test_smoothers(x, smoothers, train_index, test_index):
 
 class MultivariateGAMCV(BaseCV):
  def __init__(self, smoothers, alphas, gam, cost, y, cv):
- # the gam class has already an instance
  self.cost = cost
  self.gam = gam
  self.smoothers = smoothers
@@ -113,7 +113,10 @@ def _error(self, train_index, test_index, **kwargs):
 class BasePenaltiesPathCV(with_metaclass(ABCMeta)):
  """
  Base class for cross validation over a grid of parameters.
+
  The best parameter is saved in alpha_cv
+
+ This class is currently not used
  """
 
  def __init__(self, alphas):
@@ -141,6 +144,11 @@ def plot_path(self):
 
 
 class MultivariateGAMCVPath(object):
+ """k-fold cross-validation for GAM
+
+ Warning: The API of this class is preliminary and will change.
+
+ """
 
  def __init__(self, smoothers, alphas, gam, cost, y, cv):
  self.cost = cost

diff --git a/statsmodels/gam/gam_penalties.py b/statsmodels/gam/gam_penalties.py
@@ -179,15 +179,15 @@ def __init__(self, multivariate_smoother, alpha, weights=None,
 
  # TODO: Review this,
  if weights is None:
- # weights should hanve total length as params
- # but it can also be scalar in individual
+ # weights should have total length as params
+ # but it can also be scalar in individual component
  self.weights = [1. for _ in range(self.k_variables)]
  else:
  import warnings
  warnings.warn('weights is currently ignored')
  self.weights = weights
 
- self.mask = [np.array([False] * self.k_params)
+ self.mask = [np.zeros(self.k_params, dtype=np.bool_)
  for _ in range(self.k_variables)]
  param_count = start_idx
  for i, smoother in enumerate(self.multivariate_smoother.smoothers):

diff --git a/statsmodels/gam/smooth_basis.py b/statsmodels/gam/smooth_basis.py
@@ -642,7 +642,6 @@ def __init__(self, x, variable_names=None, include_intercept=False,
  else:
  self.include_intercept = include_intercept
 
-
  if variable_names is None:
  if data_names is not None:
  self.variable_names = data_names
@@ -725,7 +724,6 @@ def __init__(self, x, df, degree, include_intercept=False,
  super(BSplines, self).__init__(x, include_intercept=include_intercept,
  variable_names=variable_names)
 
-
  def _make_smoothers_list(self):
  smoothers = []
  for v in range(self.k_variables):

diff --git a/statsmodels/gam/tests/test_penalized.py b/statsmodels/gam/tests/test_penalized.py
@@ -547,7 +547,8 @@ def test_predict(self):
  res2 = self.res2
  # this uses transform also for exog_linear
  # predicted = res1.predict(self.exog[2:4], res1.model.smoother.x[2:4])
- predicted = res1.predict(df_autos.iloc[2:4], res1.model.smoother.x[2:4])
+ predicted = res1.predict(df_autos.iloc[2:4],
+ res1.model.smoother.x[2:4])
  assert_allclose(predicted, res1.fittedvalues[2:4],
  rtol=1e-13)
  assert_allclose(predicted, res2.fitted_values[2:4],