Skip to content

Commit

Permalink
Changes on top of Prashant's patch.
Browse files Browse the repository at this point in the history
Closes apache#316
  • Loading branch information
pwendell committed Jan 4, 2014
1 parent bc311bb commit 9e6f3bd
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 72 deletions.
19 changes: 3 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This README file only contains basic setup instructions.
## Building

Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT),
which can be obtained from [here](http://www.scala-sbt.org). To build Spark and its example programs, run:
which can be obtained [here](http://www.scala-sbt.org). To build Spark and its example programs, run:

sbt assembly

Expand All @@ -38,24 +38,11 @@ locally with one thread, or "local[N]" to run locally with N threads.

## Running tests

### With sbt (Much faster to run compared to maven)
Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows using sbt.
Testing first requires [Building](#Building) Spark. Once Spark is built, tests
can be run using:

`sbt test`

### With maven.
1. Export these necessary environment variables as follows.

`export SCALA_HOME=<scala distribution>`

`export MAVEN_OPTS="-Xmx1512m -XX:MaxPermSize=512m"`

2. Build assembly by
`mvn package -DskipTests`

3. Run tests
`mvn test`

## A Note About Hadoop Versions

Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
Expand Down
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ class SparkContext(
value <- Option(System.getenv(key))) {
executorEnvs(key) = value
}
// A workaround for SPARK_TESTING and SPARK_HOME
// Convert java options to env vars as a work around
// since we can't set env vars directly in sbt.
for { (envKey, propKey) <- Seq(("SPARK_HOME", "spark.home"), ("SPARK_TESTING", "spark.testing"))
value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
executorEnvs(envKey) = value
Expand Down
3 changes: 1 addition & 2 deletions core/src/test/scala/org/apache/spark/DriverSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ import org.apache.spark.util.Utils

class DriverSuite extends FunSuite with Timeouts {
test("driver should exit after finishing") {
val sparkHome = Option(System.getenv("SPARK_HOME"))
.orElse(Option(System.getProperty("spark.home"))).get
val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
// Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
forAll(masters) { (master: String) =>
Expand Down
77 changes: 31 additions & 46 deletions core/src/test/scala/org/apache/spark/FileServerSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,54 +27,39 @@ import org.scalatest.FunSuite
class FileServerSuite extends FunSuite with LocalSparkContext {

@transient var tmpFile: File = _
@transient var testJarFile: String = _

@transient var tmpJarUrl: String = _

override def beforeAll() {
super.beforeAll()
val buffer = new Array[Byte](10240)
val tmpdir = new File(Files.createTempDir(), "test")
tmpdir.mkdir()
val tmpJarEntry = new File(tmpdir, "FileServerSuite2.txt")
val pw = new PrintWriter(tmpJarEntry)
pw.println("test String in the file named FileServerSuite2.txt")
val tmpDir = new File(Files.createTempDir(), "test")
tmpDir.mkdir()

val textFile = new File(tmpDir, "FileServerSuite.txt")
val pw = new PrintWriter(textFile)
pw.println("100")
pw.close()
// The ugliest code possible, was translated from java.
val tmpFile2 = new File(tmpdir, "test.jar")
val stream = new FileOutputStream(tmpFile2)
val jar = new JarOutputStream(stream, new java.util.jar.Manifest())
val jarAdd = new JarEntry(tmpJarEntry.getName)
jarAdd.setTime(tmpJarEntry.lastModified)
jar.putNextEntry(jarAdd)
val in = new FileInputStream(tmpJarEntry)

val jarFile = new File(tmpDir, "test.jar")
val jarStream = new FileOutputStream(jarFile)
val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())

val jarEntry = new JarEntry(textFile.getName)
jar.putNextEntry(jarEntry)

val in = new FileInputStream(textFile)
val buffer = new Array[Byte](10240)
var nRead = 0
while (nRead <= 0) {
while (nRead <= 0) {
nRead = in.read(buffer, 0, buffer.length)
jar.write(buffer, 0, nRead)
}

in.close()
jar.close()
stream.close()
testJarFile = tmpFile2.toURI.toURL.toString
}

override def beforeEach() {
super.beforeEach()
// Create a sample text file
val tmpdir = new File(Files.createTempDir(), "test")
tmpdir.mkdir()
tmpFile = new File(tmpdir, "FileServerSuite.txt")
val pw = new PrintWriter(tmpFile)
pw.println("100")
pw.close()
}
jarStream.close()

override def afterEach() {
super.afterEach()
// Clean up downloaded file
if (tmpFile.exists) {
tmpFile.delete()
}
tmpFile = textFile
tmpJarUrl = jarFile.toURI.toURL.toString
}

test("Distributing files locally") {
Expand Down Expand Up @@ -108,10 +93,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext {

test ("Dynamically adding JARS locally") {
sc = new SparkContext("local[4]", "test")
sc.addJar(testJarFile)
sc.addJar(tmpJarUrl)
val testData = Array((1, 1))
sc.parallelize(testData).foreach { (x) =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
sc.parallelize(testData).foreach { x =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
throw new SparkException("jar not added")
}
}
Expand All @@ -133,21 +118,21 @@ class FileServerSuite extends FunSuite with LocalSparkContext {

test ("Dynamically adding JARS on a standalone cluster") {
sc = new SparkContext("local-cluster[1,1,512]", "test")
sc.addJar(testJarFile)
sc.addJar(tmpJarUrl)
val testData = Array((1,1))
sc.parallelize(testData).foreach { (x) =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
sc.parallelize(testData).foreach { x =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
throw new SparkException("jar not added")
}
}
}

test ("Dynamically adding JARS on a standalone cluster using local: URL") {
sc = new SparkContext("local-cluster[1,1,512]", "test")
sc.addJar(testJarFile.replace("file", "local"))
sc.addJar(tmpJarUrl.replace("file", "local"))
val testData = Array((1,1))
sc.parallelize(testData).foreach { (x) =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
sc.parallelize(testData).foreach { x =>
if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
throw new SparkException("jar not added")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,14 @@ package org.apache.spark.deploy.worker

import java.io.File

import scala.util.Try

import org.scalatest.FunSuite

import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription}


class ExecutorRunnerTest extends FunSuite {
test("command includes appId") {
def f(s:String) = new File(s)
val sparkHome = Try(sys.env("SPARK_HOME")).toOption
.orElse(Option(System.getProperty("spark.home"))).get
val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.env.get("spark.home")).get
val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()),
sparkHome, "appUiUrl")
val appId = "12345-worker321-9876"
Expand Down
3 changes: 2 additions & 1 deletion make-distribution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ DISTDIR="$FWDIR/dist"
export TERM=dumb # Prevents color codes in SBT output

if ! test `which sbt` ;then
echo -e "You need sbt installed and available on path, please follow the instructions here: http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html"
echo -e "You need sbt installed and available on your path."
echo -e "Download sbt from http://www.scala-sbt.org/"
exit -1;
fi

Expand Down
1 change: 1 addition & 0 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ object SparkBuild extends Build {
val sparkHome = System.getProperty("user.dir")
System.setProperty("spark.home", sparkHome)
System.setProperty("spark.testing", "1")

// Allows build configuration to be set through environment variables
lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
Expand Down
Binary file added python/lib/py4j-0.8.1-src.zip
Binary file not shown.
Binary file removed python/lib/py4j-0.8.1.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

import sys
import os
sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip"))


from pyspark.conf import SparkConf
Expand Down

0 comments on commit 9e6f3bd

Please sign in to comment.