Skip to content

Commit

Permalink
DOC Improve SGDClassifier docstrings (scikit-learn#15470)
Browse files Browse the repository at this point in the history
  • Loading branch information
poorna-kumar authored and rth committed Nov 13, 2019
1 parent ac7081c commit 9a00d72
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 19 deletions.
12 changes: 10 additions & 2 deletions maint_tools/test_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,21 @@
"RidgeClassifier.decision_function",
"RidgeClassifier.fit",
"RidgeClassifierCV.decision_function",
"SGDClassifier.decision_function",
"KernelDensity",
"KernelDensity.fit",
"KernelDensity.score",
"DecisionTreeClassifier",
"DecisionTreeRegressor",
"LinearRegression$"
"LinearRegression$",
"SGDClassifier.decision_function",
"SGDClassifier.set_params",
"SGDClassifier.get_params",
"SGDClassifier.fit",
"SGDClassifier.partial_fit",
"SGDClassifier.predict$", # $ to avoid match w/ predict_proba (regex)
"SGDClassifier.score",
"SGDClassifier.sparsify",
"SGDClassifier.densify",
]


Expand Down
53 changes: 36 additions & 17 deletions sklearn/linear_model/_stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ def __init__(self, loss, penalty='l2', alpha=0.0001, C=1.0,
self._validate_params()

def set_params(self, **kwargs):
"""Set and validate the parameters of estimator.
Parameters
----------
**kwargs : dict
Estimator parameters.
Returns
-------
self : object
Estimator instance.
"""
super().set_params(**kwargs)
self._validate_params()
return self
Expand Down Expand Up @@ -627,10 +639,10 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Subset of the training data
Subset of the training data.
y : numpy array, shape (n_samples,)
Subset of the target values
Subset of the target values.
classes : array, shape (n_classes,)
Classes across all calls to partial_fit.
Expand All @@ -646,7 +658,8 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
Returns
-------
self : returns an instance of self.
self :
Returns an instance of self.
"""
self._validate_params(for_partial_fit=True)
if self.class_weight in ['balanced']:
Expand All @@ -670,10 +683,10 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data
Training data.
y : numpy array, shape (n_samples,)
Target values
Target values.
coef_init : array, shape (n_classes, n_features)
The initial coefficients to warm-start the optimization.
Expand All @@ -685,11 +698,12 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
Weights applied to individual samples.
If not provided, uniform weights are assumed. These weights will
be multiplied with class_weight (passed through the
constructor) if class_weight is specified
constructor) if class_weight is specified.
Returns
-------
self : returns an instance of self.
self :
Returns an instance of self.
"""
return self._fit(X, y, alpha=self.alpha, C=1.0,
loss=self.loss, learning_rate=self.learning_rate,
Expand Down Expand Up @@ -777,8 +791,8 @@ class SGDClassifier(BaseSGDClassifier):
Whether or not the training data should be shuffled after each epoch.
Defaults to True.
verbose : integer, default=0
The verbosity level
verbose : int, default=0
The verbosity level.
epsilon : float, default=0.1
Epsilon in the epsilon-insensitive loss functions; only if `loss` is
Expand All @@ -802,7 +816,7 @@ class SGDClassifier(BaseSGDClassifier):
generator; If None, the random number generator is the RandomState
instance used by `np.random`.
learning_rate : string, optional
learning_rate : str, optional
The learning rate schedule:
'constant':
Expand Down Expand Up @@ -855,7 +869,7 @@ class SGDClassifier(BaseSGDClassifier):
The "balanced" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``
as ``n_samples / (n_classes * np.bincount(y))``.
warm_start : bool, default=False
When set to True, reuse the solution of the previous call to fit as
Expand Down Expand Up @@ -898,6 +912,14 @@ class SGDClassifier(BaseSGDClassifier):
Number of weight updates performed during training.
Same as ``(n_iter_ * n_samples)``.
See Also
--------
sklearn.svm.LinearSVC: Linear support vector classification.
LogisticRegression: Logistic regression.
Perceptron: Inherits from SGDClassifier. ``Perceptron()`` is equivalent to
``SGDClassifier(loss="perceptron", eta0=1, learning_rate="constant",
penalty=None)``.
Examples
--------
>>> import numpy as np
Expand All @@ -910,11 +932,6 @@ class SGDClassifier(BaseSGDClassifier):
>>> print(clf.predict([[-0.8, -1]]))
[1]
See also
--------
sklearn.svm.LinearSVC, LogisticRegression, Perceptron
"""

def __init__(self, loss="hinge", penalty='l2', alpha=0.0001, l1_ratio=0.15,
Expand Down Expand Up @@ -958,6 +975,7 @@ def predict_proba(self):
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Input data for prediction.
Returns
-------
Expand Down Expand Up @@ -1034,7 +1052,8 @@ def predict_log_proba(self):
Parameters
----------
X : array-like, shape (n_samples, n_features)
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Input data for prediction.
Returns
-------
Expand Down

0 comments on commit 9a00d72

Please sign in to comment.