Skip to content

Commit

Permalink
Deprecation of Spark ML APIs in sparkdl (databricks#213)
Browse files Browse the repository at this point in the history
Deprecation on graph/udf submodule of sparkdl, plus the various Spark ML Transformers and Estimators.
  • Loading branch information
WeichenXu123 authored and jkbradley committed Dec 18, 2019
1 parent 73b8233 commit 07bde63
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 0 deletions.
1 change: 1 addition & 0 deletions dev/dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ argcomplete==1.9.4
pylint==1.8.4
prospector==0.12.7
yapf==0.21.0
Deprecated==1.2.7
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ dependencies:
- yapf==0.21.0
# docs
- sphinx
- Deprecated==1.2.7
1 change: 1 addition & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ PyNaCl==1.2.1 # Note: This is a transitive dependency of paramiko v 1.3.0 fails
cloudpickle>=0.5.2
horovod>=0.16.0
wrapt==1.10.11
Deprecated==1.2.7
4 changes: 4 additions & 0 deletions python/sparkdl/estimators/keras_image_file_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import absolute_import, division, print_function

import threading
from deprecated import deprecated
import numpy as np

from pyspark.ml import Estimator
Expand Down Expand Up @@ -70,6 +71,9 @@ def __next__(self):
return self.models[index]


@deprecated(reason="KerasImageFileEstimator will be removed in the next release of sparkdl. "
"To replace a KerasImageFileEstimator workflow, please use "
"Distributed Hyperopt with SparkTrials to distribute model tuning.")
class KerasImageFileEstimator(Estimator, HasInputCol, HasOutputCol, HasLabelCol, HasKerasModel,
HasKerasOptimizer, HasKerasLoss, CanLoadImage, HasOutputMode):
"""
Expand Down
3 changes: 3 additions & 0 deletions python/sparkdl/graph/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#
from __future__ import absolute_import, division, print_function

from deprecated import deprecated
import tensorflow as tf
from tensorflow.core.protobuf import meta_graph_pb2 # pylint: disable=no-name-in-module

Expand All @@ -24,6 +25,8 @@
# pylint: disable=invalid-name,wrong-spelling-in-comment,wrong-spelling-in-docstring


@deprecated(reason="TFInputGraph will be removed in next release of sparkdl. "
"Please use Pandas UDF for distributed model inference.")
class TFInputGraph(object):
"""
An opaque object containing TensorFlow graph.
Expand Down
4 changes: 4 additions & 0 deletions python/sparkdl/graph/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import logging

import six
from deprecated import deprecated
import tensorflow as tf

logger = logging.getLogger('sparkdl')
Expand All @@ -33,6 +34,7 @@

__all__ = ['strip_and_freeze_until']


def validated_graph(graph):
"""
Check if the input is a valid :py:class:`tf.Graph` and return it.
Expand Down Expand Up @@ -196,6 +198,8 @@ def validated_input(tfobj_or_name, graph):
return name


@deprecated(reason="strip_and_freeze_until() will be removed in next release of sparkdl. "
"Please use Pandas UDF for distributed model inference.")
def strip_and_freeze_until(fetches, graph, sess=None, return_graph=False):
"""
Create a static view of the graph by
Expand Down
7 changes: 7 additions & 0 deletions python/sparkdl/image/imageIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from io import BytesIO
from collections import namedtuple
import warnings

# 3rd party
import numpy as np
Expand Down Expand Up @@ -173,6 +174,8 @@ def createResizeImageUDF(size):
:param size: tuple, target size of new image in the form (height, width).
:return: udf, a udf for resizing an image column to `size`.
"""
warnings.warn("createResizeImageUDF() will be removed in the next release of sparkdl. "
"Please use Pandas UDF instead.", DeprecationWarning)
if len(size) != 2:
raise ValueError(
"New image size should have format [height, width] but got {}".format(size))
Expand Down Expand Up @@ -212,6 +215,8 @@ def PIL_decode(raw_bytes):
:param raw_bytes:
:return: image data as an array in CV_8UC3 format
"""
warnings.warn("PIL_decode() will be removed in the next release of sparkdl. "
"Please use pillow and Pandas UDF instead.", DeprecationWarning)
return PIL_to_imageStruct(Image.open(BytesIO(raw_bytes)))


Expand All @@ -237,6 +242,8 @@ def readImagesWithCustomFn(path, decode_f, numPartition=None):
:param numPartition: [optional] int, number or partitions to use for reading files.
:return: DataFrame with schema == ImageSchema.imageSchema.
"""
warnings.warn("readImagesWithCustomFn() will be removed in the next release of sparkdl. "
"Please use pillow and Pandas UDF instead.", DeprecationWarning)
return _readImagesWithCustomFn(path, decode_f, numPartition, sc=SparkContext.getOrCreate())


Expand Down
3 changes: 3 additions & 0 deletions python/sparkdl/transformers/keras_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
#

from deprecated import deprecated
from pyspark.ml import Transformer
from sparkdl.param import CanLoadImage, HasInputCol, HasKerasModel, HasOutputCol, HasOutputMode, \
keyword_only
Expand All @@ -22,6 +23,8 @@
# pylint: disable=duplicate-code


@deprecated(reason="KerasImageFileTransformer will be removed in the next release of sparkdl. "
"Please use binary file data source and Pandas UDF instead.")
class KerasImageFileTransformer(Transformer, HasInputCol, HasOutputCol,
CanLoadImage, HasKerasModel, HasOutputMode):
"""
Expand Down
4 changes: 4 additions & 0 deletions python/sparkdl/transformers/keras_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from deprecated import deprecated
from pyspark.ml import Transformer
from sparkdl.graph.input import TFInputGraph
from sparkdl.param import HasInputCol, HasKerasModel, HasOutputCol, keyword_only
Expand All @@ -21,6 +23,8 @@
# pylint: disable=duplicate-code


@deprecated(reason="KerasTransformer will be removed in the next release of sparkdl. "
"Please use Pandas UDF instead.")
class KerasTransformer(Transformer, HasInputCol, HasOutputCol, HasKerasModel):
"""
Applies a Tensorflow-backed Keras model (specified by a file name) to
Expand Down
5 changes: 5 additions & 0 deletions python/sparkdl/transformers/named_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
#

from deprecated import deprecated
from keras.applications.imagenet_utils import decode_predictions
import numpy as np
import py4j
Expand All @@ -36,6 +37,8 @@
SUPPORTED_MODELS = ["InceptionV3", "Xception", "ResNet50", "VGG16", "VGG19"]


@deprecated(reason="DeepImagePredictor will be removed in the next release of sparkdl. "
"Please use Pandas UDF instead.")
class DeepImagePredictor(Transformer, HasInputCol, HasOutputCol):
"""
Applies the model specified by its popular name to the image column in DataFrame.
Expand Down Expand Up @@ -150,6 +153,8 @@ def __call__(self, value):
return self._sizeHintConverter(value)


@deprecated(reason="DeepImageFeaturizer will be removed in the next release of sparkdl. "
"Please use Pandas UDF instead.")
class DeepImageFeaturizer(JavaTransformer):
"""
Applies the model specified by its popular name, with its prediction layer(s) chopped off,
Expand Down
3 changes: 3 additions & 0 deletions python/sparkdl/transformers/tf_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
#

from deprecated import deprecated
import numpy as np
import tensorflow as tf
import tensorframes as tfs # pylint: disable=import-error
Expand All @@ -38,6 +39,8 @@
NEW_OUTPUT_PREFIX = 'sdl_flattened'


@deprecated(reason="TFImageTransformer will be removed in the next release of sparkdl. "
"Please use Pandas UDF for distributed model inference.")
class TFImageTransformer(Transformer, HasInputCol, HasOutputCol, HasOutputMode):
"""
Applies the Tensorflow graph to the image column in DataFrame.
Expand Down
4 changes: 4 additions & 0 deletions python/sparkdl/transformers/tf_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from __future__ import absolute_import, division, print_function

import logging

from deprecated import deprecated
import tensorflow as tf
# pylint: disable=no-name-in-module
from tensorflow.python.tools import optimize_for_inference_lib as infr_opt
Expand All @@ -33,6 +35,8 @@
logger = logging.getLogger('sparkdl')


@deprecated(reason="TFTransformer will be removed in the next release of sparkdl. "
"Please use Pandas UDF for distributed model inference.")
class TFTransformer(Transformer, HasTFInputGraph, HasTFHParams, HasInputMapping, HasOutputMapping):
"""
Applies the TensorFlow graph to the array column in DataFrame.
Expand Down
3 changes: 3 additions & 0 deletions python/sparkdl/transformers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
#

import warnings
import tensorflow as tf

# image stuff
Expand All @@ -25,6 +26,8 @@ def imageInputPlaceholder(nChannels=None):
'''
Inserts a TensorFlow placeholder for imput images.
'''
warnings.warn("imageInputPlaceholder() will be removed in the next release of sparkdl. "
"Please use Pandas UDF instead.", DeprecationWarning)
return tf.placeholder(tf.float32, [None, None, None, nChannels],
name=IMAGE_INPUT_PLACEHOLDER_NAME)

Expand Down
5 changes: 5 additions & 0 deletions python/sparkdl/udf/keras_image_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import logging
import warnings

from pyspark.ml.image import ImageSchema
from sparkdl.graph.builder import GraphFunction, IsolatedSession
Expand All @@ -27,6 +28,7 @@

__all__ = ['registerKerasImageUDF']


def registerKerasImageUDF(udf_name, keras_model_or_file_path, preprocessor=None):
"""
Create a Keras image model as a Spark SQL UDF.
Expand Down Expand Up @@ -89,6 +91,9 @@ def keras_load_img(fpath):
in the correct shape to be served as input to the Keras model
:return: :py:class:`GraphFunction`, the graph function for the Keras image model
"""
warnings.warn("registerKerasImageUDF() will be removed in the next release of sparkdl. "
"Please use Pandas UDF for distributed model inference.",
DeprecationWarning)
ordered_udf_names = []
keras_udf_name = udf_name
if preprocessor is not None:
Expand Down
11 changes: 11 additions & 0 deletions python/sparkdl/utils/keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os
import shutil
import tempfile
import warnings

import keras
from keras.models import load_model as _load_keras_hdf5_model
Expand All @@ -32,6 +33,8 @@ def model_to_bytes(model):
This saves the Keras model to a temp file as an intermediate step.
:return: str containing the model data
"""
warnings.warn("model_to_bytes() will be removed in the next release of sparkdl. "
"Please use model weights directly.", DeprecationWarning)
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, "model.h5")
try:
Expand All @@ -48,6 +51,8 @@ def bytes_to_h5file(modelBytes):
Dump HDF5 file content bytes to a local file
:return: path to the file
"""
warnings.warn("bytes_to_h5file() will be removed in the next release of sparkdl. "
"Please use model weights directly.", DeprecationWarning)
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, "model.h5")
with open(temp_path, mode='wb') as fout:
Expand All @@ -60,6 +65,8 @@ def bytes_to_model(modelBytes, remove_temp_path=True):
Convert a Keras model from a byte string to a Keras model instance.
This saves the Keras model to a temp file as an intermediate step.
"""
warnings.warn("bytes_to_model() will be removed in the next release of sparkdl. "
"Please use model weights directly.", DeprecationWarning)
temp_path = bytes_to_h5file(modelBytes)
try:
model = _load_keras_hdf5_model(temp_path)
Expand All @@ -81,6 +88,8 @@ def _get_loss_function(identifier):

def is_valid_loss_function(identifier):
""" Check if a named loss function is supported in Keras """
warnings.warn("is_valid_loss_function() will be removed in the next release of sparkdl. "
"Please check Keras for valid loss functions.", DeprecationWarning)
try:
_loss = _get_loss_function(identifier)
return _loss is not None
Expand All @@ -99,6 +108,8 @@ def _get_optimizer(identifier):

def is_valid_optimizer(identifier):
""" Check if a named optimizer is supported in Keras """
warnings.warn("is_valid_optimizer() will be removed in the next release of sparkdl. "
"Please check Keras for valid optimizers.", DeprecationWarning)
try:
_optim = _get_optimizer(identifier)
return _optim is not None
Expand Down

0 comments on commit 07bde63

Please sign in to comment.