Skip to content

Commit

Permalink
MNT Make modules private in sklearn.datasets (scikit-learn#15307)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasjpfan authored and NicolasHug committed Oct 27, 2019
1 parent 0b284ff commit 4a95e33
Show file tree
Hide file tree
Showing 64 changed files with 252 additions and 159 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ sklearn/svm/libsvm.py
sklearn/svm/libsvm_sparse.py
sklearn/svm/liblinear.py

sklearn/datasets/base.py
sklearn/datasets/california_housing.py
sklearn/datasets/covtype.py
sklearn/datasets/kddcup99.py
sklearn/datasets/lfw.py
sklearn/datasets/olivetti_faces.py
sklearn/datasets/openml.py
sklearn/datasets/rcv1.py
sklearn/datasets/samples_generator.py
sklearn/datasets/species_distributions.py
sklearn/datasets/svmlight_format.py
sklearn/datasets/twenty_newsgroups.py

sklearn/feature_extraction/dict_vectorizer.py
sklearn/feature_extraction/hashing.py
sklearn/feature_extraction/stop_words.py
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_glmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import gc
from time import time
from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression

alpha = 0.1
# alpha = 0.01
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from time import time
import numpy as np

from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression


def compute_bench(alpha, n_samples, n_features, precompute):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_lasso_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sklearn.linear_model import lars_path, lars_path_gram
from sklearn.linear_model import lasso_path
from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression


def compute_bench(samples_range, features_range):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_omp_lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp
from sklearn.datasets.samples_generator import make_sparse_coded_signal
from sklearn.datasets import make_sparse_coded_signal


def compute_bench(samples_range, features_range):
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/bench_plot_randomized_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@
from sklearn.utils import gen_batches
from sklearn.utils.validation import check_random_state
from sklearn.utils.extmath import randomized_svd
from sklearn.datasets.samples_generator import (make_low_rank_matrix,
make_sparse_uncorrelated)
from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated
from sklearn.datasets import (fetch_lfw_people,
fetch_openml,
fetch_20newsgroups_vectorized,
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from scipy.linalg import svd
from sklearn.utils.extmath import randomized_svd
from sklearn.datasets.samples_generator import make_low_rank_matrix
from sklearn.datasets import make_low_rank_matrix


def compute_bench(samples_range, features_range, n_iter=3, rank=50):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_sgd_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression

"""
Benchmark for SGD regression
Expand Down
2 changes: 1 addition & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ doctest:
"results in $(BUILDDIR)/doctest/output.txt."

download-data:
python -c "from sklearn.datasets.lfw import _check_fetch_lfw; _check_fetch_lfw()"
python -c "from sklearn.datasets._lfw import _check_fetch_lfw; _check_fetch_lfw()"

# Optimize PNG files. Needs OptiPNG. Change the -P argument to the number of
# cores you have available, so -P 64 if you have a real computer ;)
Expand Down
4 changes: 2 additions & 2 deletions doc/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from sklearn.utils.testing import SkipTest
from sklearn.utils.testing import check_skip_network
from sklearn.datasets import get_data_home
from sklearn.datasets.base import _pkl_filepath
from sklearn.datasets.twenty_newsgroups import CACHE_NAME
from sklearn.datasets._base import _pkl_filepath
from sklearn.datasets._twenty_newsgroups import CACHE_NAME


def setup_labeled_faces():
Expand Down
2 changes: 1 addition & 1 deletion doc/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ a :class:`~sklearn.ensemble.RandomForestRegressor` that has been fitted with
the best set of parameters. Read more in the :ref:`User Guide
<grid_search>`::

>>> from sklearn.datasets.california_housing import fetch_california_housing
>>> from sklearn.datasets import fetch_california_housing
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.model_selection import RandomizedSearchCV
>>> from sklearn.model_selection import train_test_split
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/plot_prediction_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDRegressor
Expand Down
30 changes: 28 additions & 2 deletions examples/applications/plot_species_distribution_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets.base import Bunch
from sklearn.utils import Bunch
from sklearn.datasets import fetch_species_distributions
from sklearn.datasets.species_distributions import construct_grids
from sklearn import svm, metrics

# if basemap is available, we'll use it.
Expand All @@ -61,6 +60,33 @@
print(__doc__)


def construct_grids(batch):
"""Construct the map grid from the batch object
Parameters
----------
batch : Batch object
The object returned by :func:`fetch_species_distributions`
Returns
-------
(xgrid, ygrid) : 1-D arrays
The grid corresponding to the values in batch.coverages
"""
# x,y coordinates for corner cells
xmin = batch.x_left_lower_corner + batch.grid_size
xmax = xmin + (batch.Nx * batch.grid_size)
ymin = batch.y_left_lower_corner + batch.grid_size
ymax = ymin + (batch.Ny * batch.grid_size)

# x coordinates of the grid cells
xgrid = np.arange(xmin, xmax, batch.grid_size)
# y coordinates of the grid cells
ygrid = np.arange(ymin, ymax, batch.grid_size)

return (xgrid, ygrid)


def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid):
"""Create a bunch with information about a particular organism
Expand Down
2 changes: 1 addition & 1 deletion examples/bicluster/plot_bicluster_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from sklearn.cluster import SpectralCoclustering
from sklearn.cluster import MiniBatchKMeans
from sklearn.datasets.twenty_newsgroups import fetch_20newsgroups
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.cluster import v_measure_score

Expand Down
9 changes: 7 additions & 2 deletions examples/bicluster/plot_spectral_biclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
from matplotlib import pyplot as plt

from sklearn.datasets import make_checkerboard
from sklearn.datasets import samples_generator as sg
from sklearn.cluster import SpectralBiclustering
from sklearn.metrics import consensus_score


n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
shape=(300, 300), n_clusters=n_clusters, noise=10,
Expand All @@ -36,7 +36,12 @@
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
# shuffle clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

Expand Down
8 changes: 6 additions & 2 deletions examples/bicluster/plot_spectral_coclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from matplotlib import pyplot as plt

from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster import SpectralCoclustering
from sklearn.metrics import consensus_score

Expand All @@ -34,7 +33,12 @@
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
# shuffle clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_affinity_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs

# #############################################################################
# Generate sample data
Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_birch_vs_minibatchkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import matplotlib.colors as colors

from sklearn.cluster import Birch, MiniBatchKMeans
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs


# Generate centers for the blobs so that it forms a 10 X 10 grid.
Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler


Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_mean_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs

# #############################################################################
# Generate sample data
Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_mini_batch_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs

# #############################################################################
# Generate sample data
Expand Down
2 changes: 1 addition & 1 deletion examples/cluster/plot_ward_structured_vs_unstructured.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets.samples_generator import make_swiss_roll
from sklearn.datasets import make_swiss_roll

# #############################################################################
# Generate data (swiss roll dataset)
Expand Down
6 changes: 2 additions & 4 deletions examples/compose/plot_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import fetch_20newsgroups
from sklearn.datasets.twenty_newsgroups import strip_newsgroup_footer
from sklearn.datasets.twenty_newsgroups import strip_newsgroup_quoting
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
Expand Down Expand Up @@ -69,8 +67,6 @@ def transform(self, posts):
features = np.empty(shape=(len(posts), 2), dtype=object)
for i, text in enumerate(posts):
headers, _, bod = text.partition('\n\n')
bod = strip_newsgroup_footer(bod)
bod = strip_newsgroup_quoting(bod)
features[i, 1] = bod

prefix = 'Subject:'
Expand Down Expand Up @@ -124,10 +120,12 @@ def transform(self, posts):
X_train, y_train = fetch_20newsgroups(random_state=1,
subset='train',
categories=categories,
remove=('footers', 'quotes'),
return_X_y=True)
X_test, y_test = fetch_20newsgroups(random_state=1,
subset='test',
categories=categories,
remove=('footers', 'quotes'),
return_X_y=True)

pipeline.fit(X_train, y_train)
Expand Down
4 changes: 2 additions & 2 deletions examples/feature_selection/plot_feature_selection_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
the original feature space.
"""
from sklearn import svm
from sklearn.datasets import samples_generator
from sklearn.datasets import make_classification
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
Expand All @@ -19,7 +19,7 @@
print(__doc__)

# import some data to play with
X, y = samples_generator.make_classification(
X, y = make_classification(
n_features=20, n_informative=3, n_redundant=0, n_classes=4,
n_clusters_per_class=2)

Expand Down
2 changes: 1 addition & 1 deletion examples/inspection/plot_partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.datasets.california_housing import fetch_california_housing
from sklearn.datasets import fetch_california_housing


##############################################################################
Expand Down
2 changes: 1 addition & 1 deletion examples/linear_model/plot_lasso_dense_vs_sparse_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from scipy import sparse
from scipy import linalg

from sklearn.datasets.samples_generator import make_regression
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso


Expand Down
2 changes: 1 addition & 1 deletion examples/linear_model/plot_sgd_separating_hyperplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs

# we create 50 separable points
X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
Expand Down
2 changes: 1 addition & 1 deletion examples/manifold/plot_compare_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
Axes3D

n_points = 1000
X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
X, color = datasets.make_s_curve(n_points, random_state=0)
n_neighbors = 10
n_components = 2

Expand Down
2 changes: 1 addition & 1 deletion examples/manifold/plot_swissroll.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# Locally linear embedding of the swiss roll

from sklearn import manifold, datasets
X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)
X, color = datasets.make_swiss_roll(n_samples=1500)

print("Computing LLE embedding")
X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12,
Expand Down
2 changes: 1 addition & 1 deletion examples/manifold/plot_t_sne_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
ax.axis('tight')

# Another example using s-curve
X, color = datasets.samples_generator.make_s_curve(n_samples, random_state=0)
X, color = datasets.make_s_curve(n_samples, random_state=0)

ax = subplots[1][0]
ax.scatter(X[:, 0], X[:, 2], c=color)
Expand Down
Loading

0 comments on commit 4a95e33

Please sign in to comment.