Skip to content

Commit

Permalink
Removes pip download when installing from local packages (apache#13422)
Browse files Browse the repository at this point in the history
This PR improves building production image from local packages,
in preparation for moving provider requirements out of setup.cfg.

Previously `pip download` step was executed in the CI scripts
in order to download all the packages that were needed. However
this had two problems:

1) PIP download was executed outside of Dockerfile in CI scripts
   which means that any change to requirements there could not
   be executed in 'workflow_run' event - because main branch version
   of CI scripts is used there. We want to add extra requirements
   when installing airflow so in order to be able to change
   it, those requirements should be added in Dockerfile.
   This will be done in the follow-up apache#13409 PR.

2) Packages downloaded with PIP download have a "file" version
   rather than regular == version when you run pip freeze/check.
   This looks weird and while you can figure out the version
   from file name, when you `pip install` them, they look
   much more normal. The airflow package and provider package
   will still get the "file" form but this is ok because we are
   building those packages from sources and they are not yet
   available in PyPI.

Example:

  adal==1.2.5
  aiohttp==3.7.3
  alembic==1.4.3
  amqp==2.6.1
  apache-airflow @ file:///docker-context-files/apache_airflow-2.1.0.dev0-py3-none-any.whl
  apache-airflow-providers-amazon @ file:///docker-context-files/apache_airflow_providers_amazon-1.0.0-py3-none-any.whl
  apache-airflow-providers-celery @ file:///docker-context-files/apache_airflow_providers_celery-1.0.0-py3-none-any.whl
  ...

With this PR, we do not `pip download` all packages, but instead
we prepare airflow + providers packages as .whl files and
install them from there (all the dependencies are installed
from PyPI)
  • Loading branch information
potiuk authored Jan 2, 2021
1 parent c674f81 commit e436883
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 55 deletions.
39 changes: 31 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -247,31 +247,54 @@ ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}

WORKDIR /opt/airflow

# remove mysql from extras if client is not installed
# hadolint ignore=SC2086, SC2010
RUN if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
# Remove mysql from extras if client is not installed \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
fi; \
if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
--upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
pip install --upgrade --upgrade-strategy only-if-needed \
--user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi; \
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
# We want to install apache airflow packages with constraints \
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --force-reinstall --upgrade --upgrade-strategy eager \
--user ${reinstalling_apache_airflow_packages}; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi ; \
# All the others we want to reinstall as-is, without dependencies \
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
grep -v apache_airflow | grep -v apache-airflow || true); \
if [[ "${reinstalling_other_packages}" != "" ]]; then \
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
fi; \
fi; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy only-if-needed \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi; \
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
pip install --user --no-deps /docker-context-files/*.{whl,tar.gz}; \
fi ; \
fi; \
find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true

Expand Down
32 changes: 25 additions & 7 deletions Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
pip install \
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \
&& pip uninstall --yes apache-airflow; \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip uninstall --yes apache-airflow; \
fi

# Generate random hex dump file so that we can determine whether it's faster to rebuild the image
Expand Down Expand Up @@ -325,7 +325,8 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed; \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed\
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi
Expand All @@ -334,11 +335,28 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
# they are also installed additionally to whatever is installed from Airflow.
COPY docker-context-files/ /docker-context-files/

RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} != "true" ]]; then \
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
pip install --no-deps /docker-context-files/*.{whl,tar.gz}; \
# hadolint ignore=SC2086, SC2010
RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
# We want to install apache airflow packages with constraints \
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --force-reinstall --upgrade --upgrade-strategy eager \
--user ${reinstalling_apache_airflow_packages}; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi ; \
fi
# All the others we want to reinstall as-is, without dependencies \
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
grep -v apache_airflow | grep -v apache-airflow || true); \
if [[ "${reinstalling_other_packages}" != "" ]]; then \
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
fi; \
fi;

# Copy all the www/ files we need to compile assets. Done as two separate COPY
# commands so as otherwise it copies the _contents_ of static/ in to www/
Expand Down
25 changes: 21 additions & 4 deletions IMAGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -449,10 +449,27 @@ The following build arguments (``--build-arg`` in docker build command) can be u
| | | package. It has no effect when |
| | | installing from PyPI or GitHub repo. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow and it's |
| | | dependencies are installed from locally |
| | | downloaded .whl files placed in the |
| | | ``docker-context-files``. |
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow, providers and |
| | | all dependencies are installed from |
| | | from locally built/downloaded |
| | | .whl and .tar.gz files placed in the |
| | | ``docker-context-files``. In certain |
| | | corporate environments, this is required |
| | | to install airflow from such pre-vetted |
| | | packages rather than from PyPI. For this |
| | | to work, also set ``INSTALL_FROM_PYPI``. |
| | | Note that packages starting with |
| | | ``apache?airflow`` glob are treated |
| | | differently than other packages. All |
| | | ``apache?airflow`` packages are |
| | | installed with dependencies limited by |
| | | airflow constraints. All other packages |
| | | are installed without dependencies |
| | | 'as-is'. If you wish to install airflow |
| | | via 'pip download' with all dependencies |
| | | downloaded, you have to rename the |
| | | apache airflow and provider packages to |
| | | not start with ``apache?airflow`` glob. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``AIRFLOW_EXTRAS`` | ``all`` | extras to install |
+------------------------------------------+------------------------------------------+------------------------------------------+
Expand Down
27 changes: 25 additions & 2 deletions docs/apache-airflow/production-deployment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,19 @@ Preparing the constraint files and wheel files:
--constraint docker-context-files/constraints-2-0.txt \
apache-airflow[async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,ssh,statsd,virtualenv]==2.0.0
Since apache-airflow .whl packages are treated differently by the docker image, you need to rename the
downloaded apache-airflow* files, for example:

Building the image (after copying the files downloaded to the "docker-context-files" directory:
.. code-block:: bash
pushd docker-context-files
for file in apache?airflow*
do
mv ${file} _${file}
done
popd
Building the image:

.. code-block:: bash
Expand Down Expand Up @@ -539,7 +550,19 @@ The following build arguments (``--build-arg`` in docker build command) can be u
| | | corporate environments, this is required |
| | | to install airflow from such pre-vetted |
| | | packages rather than from PyPI. For this |
| | | to work, also set ``INSTALL_FROM_PYPI`` |
| | | to work, also set ``INSTALL_FROM_PYPI``. |
| | | Note that packages starting with |
| | | ``apache?airflow`` glob are treated |
| | | differently than other packages. All |
| | | ``apache?airflow`` packages are |
| | | installed with dependencies limited by |
| | | airflow constraints. All other packages |
| | | are installed without dependencies |
| | | 'as-is'. If you wish to install airflow |
| | | via 'pip download' with all dependencies |
| | | downloaded, you have to rename the |
| | | apache airflow and provider packages to |
| | | not start with ``apache?airflow`` glob. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``UPGRADE_TO_NEWER_DEPENDENCIES`` | ``false`` | If set to true, the dependencies are |
| | | upgraded to newer versions matching |
Expand Down
5 changes: 2 additions & 3 deletions scripts/ci/images/ci_build_dockerhub.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
# we need to run those in sub-processes
(
export INSTALL_FROM_PYPI="true"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
export INSTALL_PROVIDERS_FROM_SOURCES="true"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
export DOCKER_CACHE="pulled"
# shellcheck source=scripts/ci/libraries/_script_init.sh
Expand All @@ -86,7 +86,6 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
(
export INSTALL_FROM_PYPI="false"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
export INSTALL_PROVIDERS_FROM_SOURCES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
export DOCKER_CACHE="pulled"
# shellcheck source=scripts/ci/libraries/_script_init.sh
Expand All @@ -97,7 +96,7 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
rm -rf "${BUILD_CACHE_DIR}"
rm -rf "${AIRFLOW_SOURCES}/docker-context-files/*"
build_images::prepare_prod_build
build_images::build_prod_images_from_packages
build_images::build_prod_images_from_locally_built_airflow_packages
push_pull_remove_images::push_prod_images
)
else
Expand Down
5 changes: 2 additions & 3 deletions scripts/ci/images/ci_prepare_prod_image_on_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@

export INSTALL_FROM_PYPI="false"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
export INSTALL_PROVIDERS_FROM_SOURCES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
export DOCKER_CACHE="local"
export DOCKER_CACHE="pulled"
export VERBOSE="true"


Expand All @@ -41,7 +40,7 @@ function build_prod_images_on_ci() {
":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" "${AIRFLOW_PROD_IMAGE}"

else
build_images::build_prod_images_from_packages
build_images::build_prod_images_from_locally_built_airflow_packages
fi


Expand Down
33 changes: 21 additions & 12 deletions scripts/ci/libraries/_build_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -913,31 +913,40 @@ function build_images::determine_docker_cache_strategy() {
}


function build_images::build_prod_images_from_packages() {
function build_image::assert_variable() {
local variable_name="${1}"
local expected_value="${2}"
local variable_value=${!variable_name}
if [[ ${variable_value} != "${expected_value}" ]]; then
echo
echo "${COLOR_RED_ERROR}: Variable ${variable_name}: expected_value: '${expected_value}' but was '${variable_value}'!${COLOR_RESET}"
echo
exit 1
fi
}

function build_images::build_prod_images_from_locally_built_airflow_packages() {
# We do not install from PyPI
build_image::assert_variable INSTALL_FROM_PYPI "false"
# But then we reinstall airflow and providers from prepared packages in the docker context files
build_image::assert_variable INSTALL_FROM_DOCKER_CONTEXT_FILES "true"
# But we install everything from scratch to make a "clean" installation in case any dependencies got removed
build_image::assert_variable AIRFLOW_PRE_CACHED_PIP_PACKAGES "false"

# Cleanup dist and docker-context-files folders
mkdir -pv "${AIRFLOW_SOURCES}/dist"
mkdir -pv "${AIRFLOW_SOURCES}/docker-context-files"
rm -f "${AIRFLOW_SOURCES}/dist/"*.{whl,tar.gz}
rm -f "${AIRFLOW_SOURCES}/docker-context-files/"*.{whl,tar.gz}

runs::run_pip_download

# Remove all downloaded apache airflow packages
rm -f "${AIRFLOW_SOURCES}/dist/"apache_airflow*.whl
rm -f "${AIRFLOW_SOURCES}/dist/"apache-airflow*.tar.gz

# Remove all downloaded apache airflow packages
mv -f "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"

# Build necessary provider packages
runs::run_prepare_provider_packages "${INSTALLED_PROVIDERS[@]}"

mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"

# Build apache airflow packages
build_airflow_packages::build_airflow_packages

mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"

build_images::build_prod_images_with_group
}

Expand Down
16 changes: 0 additions & 16 deletions scripts/ci/libraries/_runs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,6 @@ function runs::run_docs() {
start_end::group_end
}

# Downloads packages from PIP
function runs::run_pip_download() {
start_end::group_start "PIP download"
if [[ ${UPGRADE_TO_NEWER_DEPENDENCIES} ]]; then
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]'"
else
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]' --constraint
'https://raw.githubusercontent.com/apache/airflow/${DEFAULT_CONSTRAINTS_BRANCH}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt'"
fi
# Download all dependencies needed
docker run --rm --entrypoint /bin/bash \
"${EXTRA_DOCKER_FLAGS[@]}" \
"${AIRFLOW_CI_IMAGE}" -c "${pip_download_command}"
start_end::group_end
}

# Docker command to generate constraint files.
function runs::run_generate_constraints() {
start_end::group_start "Run generate constraints"
Expand Down

0 comments on commit e436883

Please sign in to comment.