@@ -25,6 +25,17 @@ import org.apache.spark._
25
25
import org .apache .spark .SparkContext ._
26
26
27
27
class ExternalSorterSuite extends FunSuite with LocalSparkContext {
28
+ private def createSparkConf (loadDefaults : Boolean ): SparkConf = {
29
+ val conf = new SparkConf (loadDefaults)
30
+ // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
31
+ // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
32
+ conf.set(" spark.serializer.objectStreamReset" , " 1" )
33
+ conf.set(" spark.serializer" , " org.apache.spark.serializer.JavaSerializer" )
34
+ // Ensure that we actually have multiple batches per spill file
35
+ conf.set(" spark.shuffle.spill.batchSize" , " 10" )
36
+ conf
37
+ }
38
+
28
39
test(" empty data stream" ) {
29
40
val conf = new SparkConf (false )
30
41
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
@@ -60,7 +71,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
60
71
}
61
72
62
73
test(" few elements per partition" ) {
63
- val conf = new SparkConf (false )
74
+ val conf = createSparkConf (false )
64
75
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
65
76
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
66
77
sc = new SparkContext (" local" , " test" , conf)
@@ -102,7 +113,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
102
113
}
103
114
104
115
test(" empty partitions with spilling" ) {
105
- val conf = new SparkConf (false )
116
+ val conf = createSparkConf (false )
106
117
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
107
118
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
108
119
sc = new SparkContext (" local" , " test" , conf)
@@ -127,7 +138,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
127
138
}
128
139
129
140
test(" spilling in local cluster" ) {
130
- val conf = new SparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
141
+ val conf = createSparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
131
142
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
132
143
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
133
144
sc = new SparkContext (" local-cluster[1,1,512]" , " test" , conf)
@@ -198,7 +209,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
198
209
}
199
210
200
211
test(" spilling in local cluster with many reduce tasks" ) {
201
- val conf = new SparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
212
+ val conf = createSparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
202
213
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
203
214
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
204
215
sc = new SparkContext (" local-cluster[2,1,512]" , " test" , conf)
@@ -269,7 +280,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
269
280
}
270
281
271
282
test(" cleanup of intermediate files in sorter" ) {
272
- val conf = new SparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
283
+ val conf = createSparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
273
284
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
274
285
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
275
286
sc = new SparkContext (" local" , " test" , conf)
@@ -290,7 +301,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
290
301
}
291
302
292
303
test(" cleanup of intermediate files in sorter if there are errors" ) {
293
- val conf = new SparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
304
+ val conf = createSparkConf (true ) // Load defaults, otherwise SPARK_HOME is not found
294
305
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
295
306
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
296
307
sc = new SparkContext (" local" , " test" , conf)
@@ -311,7 +322,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
311
322
}
312
323
313
324
test(" cleanup of intermediate files in shuffle" ) {
314
- val conf = new SparkConf (false )
325
+ val conf = createSparkConf (false )
315
326
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
316
327
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
317
328
sc = new SparkContext (" local" , " test" , conf)
@@ -326,7 +337,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
326
337
}
327
338
328
339
test(" cleanup of intermediate files in shuffle with errors" ) {
329
- val conf = new SparkConf (false )
340
+ val conf = createSparkConf (false )
330
341
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
331
342
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
332
343
sc = new SparkContext (" local" , " test" , conf)
@@ -348,7 +359,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
348
359
}
349
360
350
361
test(" no partial aggregation or sorting" ) {
351
- val conf = new SparkConf (false )
362
+ val conf = createSparkConf (false )
352
363
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
353
364
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
354
365
sc = new SparkContext (" local" , " test" , conf)
@@ -363,7 +374,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
363
374
}
364
375
365
376
test(" partial aggregation without spill" ) {
366
- val conf = new SparkConf (false )
377
+ val conf = createSparkConf (false )
367
378
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
368
379
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
369
380
sc = new SparkContext (" local" , " test" , conf)
@@ -379,7 +390,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
379
390
}
380
391
381
392
test(" partial aggregation with spill, no ordering" ) {
382
- val conf = new SparkConf (false )
393
+ val conf = createSparkConf (false )
383
394
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
384
395
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
385
396
sc = new SparkContext (" local" , " test" , conf)
@@ -395,7 +406,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
395
406
}
396
407
397
408
test(" partial aggregation with spill, with ordering" ) {
398
- val conf = new SparkConf (false )
409
+ val conf = createSparkConf (false )
399
410
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
400
411
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
401
412
sc = new SparkContext (" local" , " test" , conf)
@@ -412,7 +423,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
412
423
}
413
424
414
425
test(" sorting without aggregation, no spill" ) {
415
- val conf = new SparkConf (false )
426
+ val conf = createSparkConf (false )
416
427
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
417
428
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
418
429
sc = new SparkContext (" local" , " test" , conf)
@@ -429,7 +440,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
429
440
}
430
441
431
442
test(" sorting without aggregation, with spill" ) {
432
- val conf = new SparkConf (false )
443
+ val conf = createSparkConf (false )
433
444
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
434
445
conf.set(" spark.shuffle.manager" , " org.apache.spark.shuffle.sort.SortShuffleManager" )
435
446
sc = new SparkContext (" local" , " test" , conf)
@@ -446,7 +457,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
446
457
}
447
458
448
459
test(" spilling with hash collisions" ) {
449
- val conf = new SparkConf (true )
460
+ val conf = createSparkConf (true )
450
461
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
451
462
sc = new SparkContext (" local-cluster[1,1,512]" , " test" , conf)
452
463
@@ -503,7 +514,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
503
514
}
504
515
505
516
test(" spilling with many hash collisions" ) {
506
- val conf = new SparkConf (true )
517
+ val conf = createSparkConf (true )
507
518
conf.set(" spark.shuffle.memoryFraction" , " 0.0001" )
508
519
sc = new SparkContext (" local-cluster[1,1,512]" , " test" , conf)
509
520
@@ -526,7 +537,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
526
537
}
527
538
528
539
test(" spilling with hash collisions using the Int.MaxValue key" ) {
529
- val conf = new SparkConf (true )
540
+ val conf = createSparkConf (true )
530
541
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
531
542
sc = new SparkContext (" local-cluster[1,1,512]" , " test" , conf)
532
543
@@ -547,7 +558,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext {
547
558
}
548
559
549
560
test(" spilling with null keys and values" ) {
550
- val conf = new SparkConf (true )
561
+ val conf = createSparkConf (true )
551
562
conf.set(" spark.shuffle.memoryFraction" , " 0.001" )
552
563
sc = new SparkContext (" local-cluster[1,1,512]" , " test" , conf)
553
564
0 commit comments