@@ -32,204 +32,204 @@ class DoubleRDDSuite extends FunSuite with SharedSparkContext {
32
32
test(" WorksOnEmpty" ) {
33
33
// Make sure that it works on an empty input
34
34
val rdd : RDD [Double ] = sc.parallelize(Seq ())
35
- val buckets : Array [ Double ] = Array (0.0 , 10.0 )
36
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
37
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
38
- val expectedHistogramResults : Array [ Long ] = Array (0 )
35
+ val buckets = Array (0.0 , 10.0 )
36
+ val histogramResults = rdd.histogram(buckets)
37
+ val histogramResults2 = rdd.histogram(buckets, true )
38
+ val expectedHistogramResults = Array (0 )
39
39
assert(histogramResults === expectedHistogramResults)
40
40
assert(histogramResults2 === expectedHistogramResults)
41
41
}
42
42
test(" WorksWithOutOfRangeWithOneBucket" ) {
43
43
// Verify that if all of the elements are out of range the counts are zero
44
- val rdd : RDD [ Double ] = sc.parallelize(Seq (10.01 , - 0.01 ))
45
- val buckets : Array [ Double ] = Array (0.0 , 10.0 )
46
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
47
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
48
- val expectedHistogramResults : Array [ Long ] = Array (0 )
44
+ val rdd = sc.parallelize(Seq (10.01 , - 0.01 ))
45
+ val buckets = Array (0.0 , 10.0 )
46
+ val histogramResults = rdd.histogram(buckets)
47
+ val histogramResults2 = rdd.histogram(buckets, true )
48
+ val expectedHistogramResults = Array (0 )
49
49
assert(histogramResults === expectedHistogramResults)
50
50
assert(histogramResults2 === expectedHistogramResults)
51
51
}
52
52
test(" WorksInRangeWithOneBucket" ) {
53
53
// Verify the basic case of one bucket and all elements in that bucket works
54
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
55
- val buckets : Array [ Double ] = Array (0.0 , 10.0 )
56
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
57
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
58
- val expectedHistogramResults : Array [ Long ] = Array (4 )
54
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
55
+ val buckets = Array (0.0 , 10.0 )
56
+ val histogramResults = rdd.histogram(buckets)
57
+ val histogramResults2 = rdd.histogram(buckets, true )
58
+ val expectedHistogramResults = Array (4 )
59
59
assert(histogramResults === expectedHistogramResults)
60
60
assert(histogramResults2 === expectedHistogramResults)
61
61
}
62
62
test(" WorksInRangeWithOneBucketExactMatch" ) {
63
63
// Verify the basic case of one bucket and all elements in that bucket works
64
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
65
- val buckets : Array [ Double ] = Array (1.0 , 4.0 )
66
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
67
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
68
- val expectedHistogramResults : Array [ Long ] = Array (4 )
64
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
65
+ val buckets = Array (1.0 , 4.0 )
66
+ val histogramResults = rdd.histogram(buckets)
67
+ val histogramResults2 = rdd.histogram(buckets, true )
68
+ val expectedHistogramResults = Array (4 )
69
69
assert(histogramResults === expectedHistogramResults)
70
70
assert(histogramResults2 === expectedHistogramResults)
71
71
}
72
72
test(" WorksWithOutOfRangeWithTwoBuckets" ) {
73
73
// Verify that out of range works with two buckets
74
- val rdd : RDD [ Double ] = sc.parallelize(Seq (10.01 , - 0.01 ))
75
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 10.0 )
76
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
77
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
78
- val expectedHistogramResults : Array [ Long ] = Array (0 , 0 )
74
+ val rdd = sc.parallelize(Seq (10.01 , - 0.01 ))
75
+ val buckets = Array (0.0 , 5.0 , 10.0 )
76
+ val histogramResults = rdd.histogram(buckets)
77
+ val histogramResults2 = rdd.histogram(buckets, true )
78
+ val expectedHistogramResults = Array (0 , 0 )
79
79
assert(histogramResults === expectedHistogramResults)
80
80
assert(histogramResults2 === expectedHistogramResults)
81
81
}
82
82
test(" WorksWithOutOfRangeWithTwoUnEvenBuckets" ) {
83
83
// Verify that out of range works with two un even buckets
84
- val rdd : RDD [ Double ] = sc.parallelize(Seq (10.01 , - 0.01 ))
85
- val buckets : Array [ Double ] = Array (0.0 , 4.0 , 10.0 )
86
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
87
- val expectedHistogramResults : Array [ Long ] = Array (0 , 0 )
84
+ val rdd = sc.parallelize(Seq (10.01 , - 0.01 ))
85
+ val buckets = Array (0.0 , 4.0 , 10.0 )
86
+ val histogramResults = rdd.histogram(buckets)
87
+ val expectedHistogramResults = Array (0 , 0 )
88
88
assert(histogramResults === expectedHistogramResults)
89
89
}
90
90
test(" WorksInRangeWithTwoBuckets" ) {
91
91
// Make sure that it works with two equally spaced buckets and elements in each
92
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 ))
93
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 10.0 )
94
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
95
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
96
- val expectedHistogramResults : Array [ Long ] = Array (3 , 2 )
92
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 ))
93
+ val buckets = Array (0.0 , 5.0 , 10.0 )
94
+ val histogramResults = rdd.histogram(buckets)
95
+ val histogramResults2 = rdd.histogram(buckets, true )
96
+ val expectedHistogramResults = Array (3 , 2 )
97
97
assert(histogramResults === expectedHistogramResults)
98
98
assert(histogramResults2 === expectedHistogramResults)
99
99
}
100
100
test(" WorksInRangeWithTwoBucketsAndNaN" ) {
101
101
// Make sure that it works with two equally spaced buckets and elements in each
102
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 , Double .NaN ))
103
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 10.0 )
104
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
105
- val histogramResults2 : Array [ Long ] = rdd.histogram(buckets, true )
106
- val expectedHistogramResults : Array [ Long ] = Array (3 , 2 )
102
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 , Double .NaN ))
103
+ val buckets = Array (0.0 , 5.0 , 10.0 )
104
+ val histogramResults = rdd.histogram(buckets)
105
+ val histogramResults2 = rdd.histogram(buckets, true )
106
+ val expectedHistogramResults = Array (3 , 2 )
107
107
assert(histogramResults === expectedHistogramResults)
108
108
assert(histogramResults2 === expectedHistogramResults)
109
109
}
110
110
test(" WorksInRangeWithTwoUnevenBuckets" ) {
111
111
// Make sure that it works with two unequally spaced buckets and elements in each
112
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 ))
113
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 )
114
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
115
- val expectedHistogramResults : Array [ Long ] = Array (3 , 2 )
112
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 5 , 6 ))
113
+ val buckets = Array (0.0 , 5.0 , 11.0 )
114
+ val histogramResults = rdd.histogram(buckets)
115
+ val expectedHistogramResults = Array (3 , 2 )
116
116
assert(histogramResults === expectedHistogramResults)
117
117
}
118
118
test(" WorksMixedRangeWithTwoUnevenBuckets" ) {
119
119
// Make sure that it works with two unequally spaced buckets and elements in each
120
- val rdd : RDD [ Double ] = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.0 , 11.01 ))
121
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 )
122
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
123
- val expectedHistogramResults : Array [ Long ] = Array (4 , 3 )
120
+ val rdd = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.0 , 11.01 ))
121
+ val buckets = Array (0.0 , 5.0 , 11.0 )
122
+ val histogramResults = rdd.histogram(buckets)
123
+ val expectedHistogramResults = Array (4 , 3 )
124
124
assert(histogramResults === expectedHistogramResults)
125
125
}
126
126
test(" WorksMixedRangeWithFourUnevenBuckets" ) {
127
127
// Make sure that it works with two unequally spaced buckets and elements in each
128
- val rdd : RDD [ Double ] = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
128
+ val rdd = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
129
129
200.0 , 200.1 ))
130
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 )
131
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
132
- val expectedHistogramResults : Array [ Long ] = Array (4 , 2 , 1 , 3 )
130
+ val buckets = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 )
131
+ val histogramResults = rdd.histogram(buckets)
132
+ val expectedHistogramResults = Array (4 , 2 , 1 , 3 )
133
133
assert(histogramResults === expectedHistogramResults)
134
134
}
135
135
test(" WorksMixedRangeWithUnevenBucketsAndNaN" ) {
136
136
// Make sure that it works with two unequally spaced buckets and elements in each
137
- val rdd : RDD [ Double ] = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
137
+ val rdd = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
138
138
200.0 , 200.1 , Double .NaN ))
139
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 )
140
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
141
- val expectedHistogramResults : Array [ Long ] = Array (4 , 2 , 1 , 3 )
139
+ val buckets = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 )
140
+ val histogramResults = rdd.histogram(buckets)
141
+ val expectedHistogramResults = Array (4 , 2 , 1 , 3 )
142
142
assert(histogramResults === expectedHistogramResults)
143
143
}
144
144
// Make sure this works with a NaN end bucket
145
145
test(" WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRange" ) {
146
146
// Make sure that it works with two unequally spaced buckets and elements in each
147
- val rdd : RDD [ Double ] = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
147
+ val rdd = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
148
148
200.0 , 200.1 , Double .NaN ))
149
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 , Double .NaN )
150
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
151
- val expectedHistogramResults : Array [ Long ] = Array (4 , 2 , 1 , 2 , 3 )
149
+ val buckets = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 , Double .NaN )
150
+ val histogramResults = rdd.histogram(buckets)
151
+ val expectedHistogramResults = Array (4 , 2 , 1 , 2 , 3 )
152
152
assert(histogramResults === expectedHistogramResults)
153
153
}
154
154
// Make sure this works with a NaN end bucket and an inifity
155
155
test(" WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfity" ) {
156
156
// Make sure that it works with two unequally spaced buckets and elements in each
157
- val rdd : RDD [ Double ] = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
157
+ val rdd = sc.parallelize(Seq (- 0.01 , 0.0 , 1 , 2 , 3 , 5 , 6 , 11.01 , 12.0 , 199.0 ,
158
158
200.0 , 200.1 , 1.0 / 0.0 , - 1.0 / 0.0 , Double .NaN ))
159
- val buckets : Array [ Double ] = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 , Double .NaN )
160
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
161
- val expectedHistogramResults : Array [ Long ] = Array (4 , 2 , 1 , 2 , 4 )
159
+ val buckets = Array (0.0 , 5.0 , 11.0 , 12.0 , 200.0 , Double .NaN )
160
+ val histogramResults = rdd.histogram(buckets)
161
+ val expectedHistogramResults = Array (4 , 2 , 1 , 2 , 4 )
162
162
assert(histogramResults === expectedHistogramResults)
163
163
}
164
164
test(" WorksWithOutOfRangeWithInfiniteBuckets" ) {
165
165
// Verify that out of range works with two buckets
166
- val rdd : RDD [ Double ] = sc.parallelize(Seq (10.01 , - 0.01 , Double .NaN ))
167
- val buckets : Array [ Double ] = Array (- 1.0 / 0.0 , 0.0 , 1.0 / 0.0 )
168
- val histogramResults : Array [ Long ] = rdd.histogram(buckets)
169
- val expectedHistogramResults : Array [ Long ] = Array (1 , 1 )
166
+ val rdd = sc.parallelize(Seq (10.01 , - 0.01 , Double .NaN ))
167
+ val buckets = Array (- 1.0 / 0.0 , 0.0 , 1.0 / 0.0 )
168
+ val histogramResults = rdd.histogram(buckets)
169
+ val expectedHistogramResults = Array (1 , 1 )
170
170
assert(histogramResults === expectedHistogramResults)
171
171
}
172
172
// Test the failure mode with an invalid bucket array
173
173
test(" ThrowsExceptionOnInvalidBucketArray" ) {
174
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1.0 ))
174
+ val rdd = sc.parallelize(Seq (1.0 ))
175
175
// Empty array
176
176
intercept[IllegalArgumentException ] {
177
- val buckets : Array [ Double ] = Array .empty[Double ]
177
+ val buckets = Array .empty[Double ]
178
178
val result = rdd.histogram(buckets)
179
179
}
180
180
// Single element array
181
181
intercept[IllegalArgumentException ] {
182
- val buckets : Array [ Double ] = Array (1.0 )
182
+ val buckets = Array (1.0 )
183
183
val result = rdd.histogram(buckets)
184
184
}
185
185
}
186
186
187
187
// Test automatic histogram function
188
188
test(" WorksWithoutBucketsBasic" ) {
189
189
// Verify the basic case of one bucket and all elements in that bucket works
190
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
190
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
191
191
val (histogramBuckets, histogramResults) = rdd.histogram(1 )
192
- val expectedHistogramResults : Array [ Long ] = Array (4 )
193
- val expectedHistogramBuckets : Array [ Double ] = Array (1.0 , 4.0 )
192
+ val expectedHistogramResults = Array (4 )
193
+ val expectedHistogramBuckets = Array (1.0 , 4.0 )
194
194
assert(histogramResults === expectedHistogramResults)
195
195
assert(histogramBuckets === expectedHistogramBuckets)
196
196
}
197
197
// Test automatic histogram function with a single element
198
198
test(" WorksWithoutBucketsBasicSingleElement" ) {
199
199
// Verify the basic case of one bucket and all elements in that bucket works
200
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 ))
200
+ val rdd = sc.parallelize(Seq (1 ))
201
201
val (histogramBuckets, histogramResults) = rdd.histogram(1 )
202
- val expectedHistogramResults : Array [ Long ] = Array (1 )
203
- val expectedHistogramBuckets : Array [ Double ] = Array (1.0 , 1.0 )
202
+ val expectedHistogramResults = Array (1 )
203
+ val expectedHistogramBuckets = Array (1.0 , 1.0 )
204
204
assert(histogramResults === expectedHistogramResults)
205
205
assert(histogramBuckets === expectedHistogramBuckets)
206
206
}
207
207
// Test automatic histogram function with a single element
208
208
test(" WorksWithoutBucketsBasicNoRange" ) {
209
209
// Verify the basic case of one bucket and all elements in that bucket works
210
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 1 , 1 , 1 ))
210
+ val rdd = sc.parallelize(Seq (1 , 1 , 1 , 1 ))
211
211
val (histogramBuckets, histogramResults) = rdd.histogram(1 )
212
- val expectedHistogramResults : Array [ Long ] = Array (4 )
213
- val expectedHistogramBuckets : Array [ Double ] = Array (1.0 , 1.0 )
212
+ val expectedHistogramResults = Array (4 )
213
+ val expectedHistogramBuckets = Array (1.0 , 1.0 )
214
214
assert(histogramResults === expectedHistogramResults)
215
215
assert(histogramBuckets === expectedHistogramBuckets)
216
216
}
217
217
test(" WorksWithoutBucketsBasicTwo" ) {
218
218
// Verify the basic case of one bucket and all elements in that bucket works
219
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
219
+ val rdd = sc.parallelize(Seq (1 , 2 , 3 , 4 ))
220
220
val (histogramBuckets, histogramResults) = rdd.histogram(2 )
221
- val expectedHistogramResults : Array [ Long ] = Array (2 , 2 )
222
- val expectedHistogramBuckets : Array [ Double ] = Array (1.0 , 2.5 , 4.0 )
221
+ val expectedHistogramResults = Array (2 , 2 )
222
+ val expectedHistogramBuckets = Array (1.0 , 2.5 , 4.0 )
223
223
assert(histogramResults === expectedHistogramResults)
224
224
assert(histogramBuckets === expectedHistogramBuckets)
225
225
}
226
226
test(" WorksWithoutBucketsWithMoreRequestedThanElements" ) {
227
227
// Verify the basic case of one bucket and all elements in that bucket works
228
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 2 ))
228
+ val rdd = sc.parallelize(Seq (1 , 2 ))
229
229
val (histogramBuckets, histogramResults) = rdd.histogram(10 )
230
- val expectedHistogramResults : Array [ Long ] =
230
+ val expectedHistogramResults =
231
231
Array (1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 )
232
- val expectedHistogramBuckets : Array [ Double ] =
232
+ val expectedHistogramBuckets =
233
233
Array (1.0 , 1.1 , 1.2 , 1.3 , 1.4 , 1.5 , 1.6 , 1.7 , 1.8 , 1.9 , 2.0 )
234
234
assert(histogramResults === expectedHistogramResults)
235
235
assert(histogramBuckets === expectedHistogramBuckets)
@@ -239,12 +239,12 @@ class DoubleRDDSuite extends FunSuite with SharedSparkContext {
239
239
test(" ThrowsExceptionOnInvalidRDDs" ) {
240
240
// infinity
241
241
intercept[UnsupportedOperationException ] {
242
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , 1.0 / 0.0 ))
242
+ val rdd = sc.parallelize(Seq (1 , 1.0 / 0.0 ))
243
243
val result = rdd.histogram(1 )
244
244
}
245
245
// NaN
246
246
intercept[UnsupportedOperationException ] {
247
- val rdd : RDD [ Double ] = sc.parallelize(Seq (1 , Double .NaN ))
247
+ val rdd = sc.parallelize(Seq (1 , Double .NaN ))
248
248
val result = rdd.histogram(1 )
249
249
}
250
250
// Empty
0 commit comments