diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 9ea639dc4f960..4eb17bfdcca90 100644 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -394,7 +394,6 @@ def test_fit_maximize_metric(self): if __name__ == "__main__": - from pyspark.ml.tests import * if xmlrunner: unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports')) else: diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index ea7d297cba2ae..32ed48e10388e 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -77,24 +77,21 @@ pass ser = PickleSerializer() +sc = SparkContext('local[4]', "MLlib tests") class MLlibTestCase(unittest.TestCase): def setUp(self): - self.sc = SparkContext('local[4]', "MLlib tests") - - def tearDown(self): - self.sc.stop() + self.sc = sc class MLLibStreamingTestCase(unittest.TestCase): def setUp(self): - self.sc = SparkContext('local[4]', "MLlib tests") + self.sc = sc self.ssc = StreamingContext(self.sc, 1.0) def tearDown(self): self.ssc.stop(False) - self.sc.stop() @staticmethod def _eventually(condition, timeout=30.0, catch_assertions=False): @@ -1169,7 +1166,7 @@ def test_predictOn_model(self): clusterWeights=[1.0, 1.0, 1.0, 1.0]) predict_data = [[[1.5, 1.5]], [[-1.5, 1.5]], [[-1.5, -1.5]], [[1.5, -1.5]]] - predict_data = [self.sc.parallelize(batch, 1) for batch in predict_data] + predict_data = [sc.parallelize(batch, 1) for batch in predict_data] predict_stream = self.ssc.queueStream(predict_data) predict_val = stkm.predictOn(predict_stream) @@ -1200,7 +1197,7 @@ def test_trainOn_predictOn(self): # classification based in the initial model would have been 0 # proving that the model is updated. batches = [[[-0.5], [0.6], [0.8]], [[0.2], [-0.1], [0.3]]] - batches = [self.sc.parallelize(batch) for batch in batches] + batches = [sc.parallelize(batch) for batch in batches] input_stream = self.ssc.queueStream(batches) predict_results = [] @@ -1233,7 +1230,7 @@ def test_dim(self): self.assertEqual(len(point.features), 3) linear_data = LinearDataGenerator.generateLinearRDD( - sc=self.sc, nexamples=6, nfeatures=2, eps=0.1, + sc=sc, nexamples=6, nfeatures=2, eps=0.1, nParts=2, intercept=0.0).collect() self.assertEqual(len(linear_data), 6) for point in linear_data: @@ -1409,7 +1406,7 @@ def test_parameter_accuracy(self): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0, 10.0], xMean, xVariance, 100, 42 + i, 0.1) - batches.append(self.sc.parallelize(batch)) + batches.append(sc.parallelize(batch)) input_stream = self.ssc.queueStream(batches) slr.trainOn(input_stream) @@ -1433,7 +1430,7 @@ def test_parameter_convergence(self): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1) - batches.append(self.sc.parallelize(batch)) + batches.append(sc.parallelize(batch)) model_weights = [] input_stream = self.ssc.queueStream(batches) @@ -1466,7 +1463,7 @@ def test_prediction(self): 0.0, [10.0, 10.0], [0.0, 0.0], [1.0 / 3.0, 1.0 / 3.0], 100, 42 + i, 0.1) batches.append( - self.sc.parallelize(batch).map(lambda lp: (lp.label, lp.features))) + sc.parallelize(batch).map(lambda lp: (lp.label, lp.features))) input_stream = self.ssc.queueStream(batches) output_stream = slr.predictOnValues(input_stream) @@ -1497,7 +1494,7 @@ def test_train_prediction(self): for i in range(10): batch = LinearDataGenerator.generateLinearInput( 0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1) - batches.append(self.sc.parallelize(batch)) + batches.append(sc.parallelize(batch)) predict_batches = [ b.map(lambda lp: (lp.label, lp.features)) for b in batches] @@ -1583,7 +1580,6 @@ def test_als_ratings_id_long_error(self): if __name__ == "__main__": - from pyspark.mllib.tests import * if not _have_scipy: print("NOTE: Skipping SciPy tests as it does not seem to be installed") if xmlrunner: diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index ae8620274dd20..c03cb9338ae68 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1259,7 +1259,6 @@ def test_collect_functions(self): if __name__ == "__main__": - from pyspark.sql.tests import * if xmlrunner: unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports')) else: diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index 24b812615cbb4..86b05d9fd2424 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -1635,7 +1635,6 @@ def search_kinesis_asl_assembly_jar(): are_kinesis_tests_enabled = os.environ.get(kinesis_test_environ_var) == '1' if __name__ == "__main__": - from pyspark.streaming.tests import * kafka_assembly_jar = search_kafka_assembly_jar() flume_assembly_jar = search_flume_assembly_jar() mqtt_assembly_jar = search_mqtt_assembly_jar() diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 23720502a82c8..5bd94476597ab 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -2008,7 +2008,6 @@ def test_statcounter_array(self): if __name__ == "__main__": - from pyspark.tests import * if not _have_scipy: print("NOTE: Skipping SciPy tests as it does not seem to be installed") if not _have_numpy: