Skip to content

Commit

Permalink
Add Python docs about SparkConf
Browse files Browse the repository at this point in the history
  • Loading branch information
mateiz committed Dec 29, 2013
1 parent 615fb64 commit 58c6fa2
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
42 changes: 42 additions & 0 deletions python/pyspark/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,62 @@


class SparkConf(object):
"""
Configuration for a Spark application. Used to set various Spark
parameters as key-value pairs.
Most of the time, you would create a SparkConf object with
C{SparkConf()}, which will load values from `spark.*` Java system
properties and any `spark.conf` on your application's classpath.
In this case, system properties take priority over `spark.conf`,
and any parameters you set directly on the `SparkConf` object take
priority over both of those.
For unit tests, you can also call C{SparkConf(false)} to skip
loading external settings and get the same configuration no matter
what is on the classpath.
All setter methods in this class support chaining. For example,
you can write C{conf.setMaster("local").setAppName("My app")}.
"""

def __init__(self, loadDefaults=True, _jvm=None):
"""
Create a new Spark configuration.
@param loadDefaults: whether to load values from Java system
properties and classpath (true by default)
"""
from pyspark.context import SparkContext
SparkContext._ensure_initialized()
_jvm = _jvm or SparkContext._jvm
self._jconf = _jvm.SparkConf(loadDefaults)

def set(self, key, value):
"""Set a configuration property."""
self._jconf.set(key, value)
return self

def setMaster(self, value):
"""Set master URL to connect to."""
self._jconf.setMaster(value)
return self

def setAppName(self, value):
"""Set application name."""
self._jconf.setAppName(value)
return self

def setSparkHome(self, value):
"""
Set path where Spark is installed on worker nodes (needed for some
deployment modes).
"""
self._jconf.setSparkHome(value)
return self

def setExecutorEnv(self, key=None, value=None, pairs=None):
"""Set an environment variable to be passed to executors."""
if (key != None and pairs != None) or (key == None and pairs == None):
raise Exception("Either pass one key-value pair or a list of pairs")
elif key != None:
Expand All @@ -83,23 +116,32 @@ def setExecutorEnv(self, key=None, value=None, pairs=None):
return self

def setAll(self, pairs):
"""
Set multiple parameters, passed as a list of key-value pairs.
@param pairs: list of key-value pairs to set
"""
for (k, v) in pairs:
self._jconf.set(k, v)
return self

def get(self, key):
"""Get the configured value for some key, if set."""
return self._jconf.get(key)

def getOrElse(self, key, defaultValue):
"""Get the value for some key, or return a default otherwise."""
return self._jconf.getOrElse(key, defaultValue)

def getAll(self):
"""Get all values as a list of key-value pairs."""
pairs = []
for elem in self._jconf.getAll():
pairs.append((elem._1(), elem._2()))
return pairs

def contains(self, key):
"""Does this configuration contain a given key?"""
return self._jconf.contains(key)


Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ class SparkContext(object):
def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None):
"""
Create a new SparkContext.
Create a new SparkContext. At least the master and app name should be set,
either through the named parameters here or through C{conf}.
@param master: Cluster URL to connect to
(e.g. mesos://host:port, spark://host:port, local[4]).
Expand Down

0 comments on commit 58c6fa2

Please sign in to comment.