Skip to content

Commit

Permalink
[FEATURE][databrickslabs#123] add support for jinja2 deployment files (
Browse files Browse the repository at this point in the history
…databrickslabs#124)

[FEATURE][databrickslabs#123] add support for jinja2 deployment files (databrickslabs#124)
  • Loading branch information
elenamartina authored Feb 17, 2022
1 parent af9fe07 commit 36a6cd9
Show file tree
Hide file tree
Showing 20 changed files with 628 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Documentation, examples and support for Jobs API 2.1
- Support for Jinja2-based templates inside deployment configuration

### Fixed

Expand Down
32 changes: 32 additions & 0 deletions dbx/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import click
import emoji
import git
import jinja2
import mlflow
import mlflow.entities
import pkg_resources
Expand Down Expand Up @@ -174,12 +175,43 @@ def get_all_environment_names(self) -> Any:
return list(self.resolve_env_vars(read_json(self._path)).keys())


class Jinja2DeploymentConfig(AbstractDeploymentConfig):
def __init__(self, path, ext):
super().__init__(path)
self._ext = ext

def _render_jinja_template(self) -> str:
path_list = self._path.split("/")
file_name = path_list.pop()
file_path = "/".join(path_list)

j2_env = jinja2.Environment(loader=jinja2.FileSystemLoader(file_path))
return j2_env.get_template(file_name).render(os.environ)

def _get_deployment_config(self) -> Dict[str, Any]:
template = self._render_jinja_template()
if self._ext == "json":
return json.loads(template)
elif self._ext in ["yml", "yaml"]:
yaml = ruamel.yaml.YAML(typ="safe")
return yaml.load(template).get("environments")

def get_environment(self, environment: str) -> Any:
return self._get_deployment_config().get(environment)

def get_all_environment_names(self) -> Any:
return list(self._get_deployment_config().keys())


def get_deployment_config(path: str) -> AbstractDeploymentConfig:
ext = path.split(".").pop()
if ext == "json":
return JsonDeploymentConfig(path)
elif ext in ["yml", "yaml"]:
return YamlDeploymentConfig(path)
elif ext == "j2":
second_ext = path.split(".")[-2]
return Jinja2DeploymentConfig(path, second_ext)
else:
raise Exception(f"Undefined config file handler for extension: {ext}")

Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Follow the :ref:`quickstart` to install the package and create the first sample
run_submit
properties_propagation
multitask_jobs
jinja2_support
path_adjustment
environment_variables
named_properties
Expand Down
21 changes: 21 additions & 0 deletions docs/source/jinja2_support.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Jinja2 Support: Environment variables, logic and loops
=============================================================

Since version 0.4.0 :code:`dbx` supports `Jinja2 <https://jinja.palletsprojects.com/en/3.0.x/api/>`_ rendering for JSON and YAML based configurations.
This allows you to use environment variables in the deployment, add variable-based conditions, `Jinja filters <https://jinja.palletsprojects.com/en/3.0.x/templates/#filters>`_ and for loops to make your deployment more flexible for CI pipelines.

To add Jinja2 support to your deployment file, please add postfix :code:`.j2` to the name of your deployment file, for example :code:`deployment.yml.j2`

Please find examples on how to use Jinja2 templates below:

.. tabs::

.. tab:: deployment.json.j2

.. literalinclude:: ../../tests/deployment-configs/jinja-example.json.j2
:language: jinja

.. tab:: deployment.yml.j2

.. literalinclude:: ../../tests/deployment-configs/jinja-example.yaml.j2
:language: yaml+jinja
2 changes: 1 addition & 1 deletion docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ You can define re-usable definitions in yaml. Here is an example yaml and its js

.. tab:: JSON Equivalent

.. literalinclude:: ../../tests/deployment-configs/02-yaml-with-vars-test.json
.. literalinclude:: ../../tests/deployment-configs/02-json-with-vars-test.json
:language: JSON


Expand Down
File renamed without changes.
15 changes: 15 additions & 0 deletions tests/deployment-configs/01-jinja-test.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
environments:
default:
jobs:
- name: "your-job-name"
new_cluster:
spark_version: "7.3.x-cpu-ml-scala2.12"
node_type_id: "some-node-type"
aws_attributes:
first_on_demand: 0
availability: "SPOT"
num_workers: 2
libraries: []
max_retries: 0
spark_python_task:
python_file: "tests/deployment-configs/placeholder_1.py"
23 changes: 23 additions & 0 deletions tests/deployment-configs/01-json-test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"default": {
"jobs": [
{
"name": "your-job-name",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT"
},
"num_workers": 2
},
"libraries": [],
"max_retries": 0,
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_1.py"
}
}
]
}
}
89 changes: 89 additions & 0 deletions tests/deployment-configs/02-jinja-with-vars-test.json.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"default": {
"jobs": [
{
"name": "your-job-name",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT"
},
"num_workers": 2
},
"libraries": [],
"max_retries": 0,
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_1.py"
}
},
{
"name": "your-job-name-2",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT"
},
"num_workers": 2
},
"libraries": [],
"max_retries": 0,
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_2.py"
}
},
{
"name": "your-job-name-3",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT"
},
"num_workers": 2
},
"libraries": [
{
"pypi": {
"package": "pydash"
}
}
],
"max_retries": 5,
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_2.py"
}
},
{
"name": "your-job-name-4",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT"
},
"autoscale": {
"min_workers": 2,
"max_workers": 5
}
},
"libraries": [
{
"pypi": {
"package": "pydash"
}
}
],
"max_retries": 5,
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_2.py"
}
}
]
}
}
64 changes: 64 additions & 0 deletions tests/deployment-configs/02-jinja-with-vars-test.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# http://yaml.org/spec/1.2/spec.html
# https://learnxinyminutes.com/docs/yaml/

custom:
basic-cluster-props: &basic-cluster-props
spark_version: "7.3.x-cpu-ml-scala2.12"
node_type_id: "some-node-type"
aws_attributes:
first_on_demand: 0
availability: "SPOT"
basic-auto-scale-props: &basic-auto-scale-props
autoscale:
min_workers: 2
max_workers: 5

basic-static-cluster: &basic-static-cluster
new_cluster:
<<: *basic-cluster-props
num_workers: 2

basic-autoscale-cluster: &basic-autoscale-cluster
new_cluster:
<<: # merge these two maps and place them here.
- *basic-cluster-props
- *basic-auto-scale-props

basic-cluster-libraries: &basic-cluster-libraries
libraries:
- pypi:
package: "pydash"


environments:
default:
jobs:
- name: "your-job-name"
<<: *basic-static-cluster
libraries: []
max_retries: 0
spark_python_task:
python_file: "tests/deployment-configs/placeholder_1.py"

- name: "your-job-name-2"
<<: *basic-static-cluster
libraries: []
max_retries: 0
spark_python_task:
python_file: "tests/deployment-configs/placeholder_2.py"

- name: "your-job-name-3"
<<:
- *basic-static-cluster
- *basic-cluster-libraries
max_retries: 5
spark_python_task:
python_file: "tests/deployment-configs/placeholder_2.py"

- name: "your-job-name-4"
<<:
- *basic-autoscale-cluster
- *basic-cluster-libraries
max_retries: 5
spark_python_task:
python_file: "tests/deployment-configs/placeholder_2.py"
42 changes: 42 additions & 0 deletions tests/deployment-configs/03-multitask-job.json.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"default": {
"jobs": [
{
"name": "multitask-job-name",
"tasks": [
{
"task_key": "first-task",
"description": "some description",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"num_workers": 2
},
"max_retries": 0,
"spark_python_task": {
"python_file": "placeholder_1.py"
}
},
{
"task_key": "second",
"description": "some description",
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"num_workers": 2
},
"max_retries": 0,
"spark_python_task": {
"python_file": "placeholder_1.py"
},
"depends_on": [
{
"task_key": "first-task"
}
]
}
]
}
]
}
}
28 changes: 28 additions & 0 deletions tests/deployment-configs/03-multitask-job.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# http://yaml.org/spec/1.2/spec.html
# https://learnxinyminutes.com/docs/yaml/

custom:
basic-cluster-props: &basic-cluster-props
spark_version: "7.3.x-cpu-ml-scala2.12"
node_type_id: "some-node-type"

basic-static-cluster: &basic-static-cluster
new_cluster:
<<: *basic-cluster-props
num_workers: 2

environments:
default:
jobs:
- name: "your-job-name"
tasks:
- task_key: "first-task"
<<: *basic-static-cluster
spark_python_task:
python_file: "./placeholder_1.py"
- task_key: "second-task"
<<: *basic-static-cluster
spark_python_task:
python_file: "./placeholder_2.py"
depends_on:
- task_key: "second-task"
29 changes: 29 additions & 0 deletions tests/deployment-configs/04-jinja-with-env-vars.json.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"default": {
"jobs": [
{
"name": "your-job-name",
"timeout_seconds": "{{ TIMEOUT }}",
"email_notifications": {
"on_failure": [
"{{ ALERT_EMAIL | lower }}",
"[email protected]"
]
},
"new_cluster": {
"spark_version": "7.3.x-cpu-ml-scala2.12",
"node_type_id": "some-node-type",
"aws_attributes": {
"first_on_demand": 0,
"availability": "{{ AVAILABILITY | default('SPOT') }}"
},
"num_workers": 2
},
"libraries": [],
"max_retries": "{{ MAX_RETRY | default(3) }}",
"spark_python_task": {
"python_file": "tests/deployment-configs/placeholder_1.py"
}
}]
}
}
Loading

0 comments on commit 36a6cd9

Please sign in to comment.