Skip to content

Commit

Permalink
Use dockerfile_parse to get Dockerfile info
Browse files Browse the repository at this point in the history
This is work towards tern-tools#522

We add initial functionality for parsing dockerfiles using
dockerfile_parse. We also add some tests for the functions therein
and some extra functions to parse various pieces of the Dockerfile
we will need.

Most of the work is in tern/analyze/docker/dockerfile.py.
We add a class called Dockerfile which will contain the information
parsed using the function get_dockerfile_obj. The typical workflow
is to create a Dockerfile object using get_dockerfile_obj from an
existing Dockerfile file. Then we can use the other functions to
return the information we want

- replace_env will do a key-value replacement of any piece of the
Dockerfile object's structure property with any key-value dict. The
typical use for this is to replace ENVs with their values in any
Dockerfile line.
- expand_vars will do the replacement wholesale for the Dockerfile
content.
- parse_from_image will get a dictionary containing tokens in the
image string for each FROM line in the Dockerfile. In order for
this to work, we also add a function called parse_image_string
to tern/utils/general.py which will do the parsing of the image
string. This helps us use this parsing for image names passed via
command line using the -i flag.
- Added tests for these functions in test_analyze_docker_dockerfile.py
For these tests, we also add some dockerfiles.

Other changes include:
- Add dockerfile-parse to the list of requirements.
- Add tests for dockerfile and general to the CI tests.

Signed-off-by: Nisha K <[email protected]>
  • Loading branch information
Nisha K committed Feb 12, 2020
1 parent d18c4e1 commit 6f09399
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 11 deletions.
7 changes: 4 additions & 3 deletions ci/test_files_touched.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 VMware, Inc. All Rights Reserved.
# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

from git import Repo
Expand Down Expand Up @@ -72,6 +72,7 @@
'tern -l report -i centos:7'],
# tern/analyze/docker
re.compile('tern/analyze/docker'): [
'python tests/test_analyze_docker_dockerfile.py',
'tern -l report -i golang:alpine',
'tern -l report -d samples/alpine_python/Dockerfile'],
# tern/report
Expand All @@ -86,6 +87,7 @@
['tern -l report -i golang:alpine'],
# tern/utils
re.compile('tern/utils'): [
'python tests/test_util_general.py',
'tern -l report -i golang:alpine',
'tern -l report -d samples/alpine_python/Dockerfile'],
# tests
Expand All @@ -106,8 +108,7 @@
re.compile('tests/test_class_package.py'):
['python tests/test_class_package.py'],
re.compile('tests/test_class_template.py'):
['python tests/test_class_template.py']
}
['python tests/test_class_template.py']}

alltests = []
for change in changes:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

PyYAML>=5.2
docker~=4.1
dockerfile-parse~=0.0
requests~=2.22
stevedore>=1.31
pbr>=5.4
91 changes: 87 additions & 4 deletions tern/analyze/docker/dockerfile.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2017-2019 VMware, Inc. All Rights Reserved.
# Copyright (c) 2017-2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

"""
Dockerfile parser and information retrieval
Dockerfile information retrieval and modification
"""

from dockerfile_parse import DockerfileParser
import re

from tern.utils.general import clean_command
from tern.utils import general

directives = ['FROM',
'ARG',
Expand All @@ -35,6 +36,88 @@
tag_separator = ':'


class Dockerfile():
''' This class is used as a wrapper to store dockerfile information
retrieved from the parser.'''
def __init__(self):
self.structure = None
self.envs = None
self.prev_env = None
self.filepath = ""
self.parent_images = []

def is_none(self):
"""Check if the object is empty."""
is_none = True
if (self.structure or
self.envs or
self.prev_env or
self.filepath):
is_none = False
return is_none


def get_dockerfile_obj(dockerfile_name, prev_env=None):
'''Given a Dockerfile, create a Dockerfile parser object to be used later.
dockerfile_name: This is the path to the Dockerfile including the
file name
prev_env: These are environment variables that may have been used in
previous stages in a multistage docker build. Should be a python dictionary
of the form {'ENV': 'value',...}'''
dfobj = Dockerfile()
with open(dockerfile_name) as f:
parser = DockerfileParser(parent_env=prev_env, fileobj=f)
dfobj.filepath = dockerfile_name
dfobj.structure = parser.structure
dfobj.envs = parser.envs
dfobj.prev_env = prev_env
dfobj.parent_images = parser.parent_images
return dfobj


def replace_env(key_value_dict, df_structure_dict):
'''Replace the environment variables in the key_value_dict dictionary
with its corresponding value in the df_line_dict dictionary
key_value_dict: a dictionary of key-value pairs like envs in the dockerfile
object
df_structure_dict: a dictionary from the dockerfile object's structure'''
for key, val in key_value_dict.items():
envvar1 = '$' + key
envvar2 = '${' + key + '}'
df_structure_dict['content'] = df_structure_dict['content'].replace(
envvar1, val)
df_structure_dict['content'] = df_structure_dict['content'].replace(
envvar2, val)
df_structure_dict['value'] = df_structure_dict['value'].replace(
envvar1, val)
df_structure_dict['value'] = df_structure_dict['value'].replace(
envvar2, val)


def expand_vars(dfobj):
'''Replace the environment variables with their values if known
dfobj: the Dockerfile object created using get_dockerfile_obj'''
if dfobj.envs:
for obj in dfobj.structure:
replace_env(dfobj.envs, obj)
if dfobj.prev_env:
for obj in dfobj.structure:
replace_env(dfobj.prev_env, obj)


def parse_from_image(dfobj):
'''Get a list of dictionaries from the FROM instruction. The dictionary
should be of the form:
[{'name': <image name used (either from dockerhub or full name)>,
'tag': <image tag>,
'digest_type': <the hashing algorithm used>
'digest': <image digest>}..]'''
image_list = []
for image_string in dfobj.parent_images:
image_list.append(general.parse_image_string(image_string))
return image_list


def get_command_list(dockerfile_name):
'''Given a Dockerfile, return a list of Docker commands'''
with open(dockerfile_name) as f:
Expand Down Expand Up @@ -80,7 +163,7 @@ def get_directive_list(command_list):
i.e. FROM, ADD, COPY etc and the object to be acted upon'''
directive_list = []
for command in command_list:
directive_list.append(get_directive(clean_command(command)))
directive_list.append(get_directive(general.clean_command(command)))
return directive_list


Expand Down
30 changes: 30 additions & 0 deletions tern/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,33 @@ def check_image_string(image_str: str):
if re.match(tag_format, image_str) or re.match(digest_format, image_str):
return True
return False


def parse_image_string(image_string):
'''From the image string used to reference an image, return a dictionary
of the form:
{'name': <image name used (either from dockerhub or full name)>,
'tag': <image tag>,
'digest_type': <the hashing algorithm used>,
'digest': <image digest>}
per Docker's convention, an image can be referenced either as
image OR image:tag OR image@hash:digest
we choose ':' and '@' as separators
Currently OCI also uses this convention'''
tokens = re.split(r'[@:]', image_string)
if len(tokens) == 1:
return {'name': tokens[0],
'tag': '',
'digest_type': '',
'digest': ''}
if len(tokens) == 2:
return {'name': tokens[0],
'tag': tokens[1],
'digest_type': '',
'digest': ''}
if len(tokens) == 3:
return {'name': tokens[0],
'tag': '',
'digest_type': tokens[1],
'digest': tokens[2]}
return None
18 changes: 18 additions & 0 deletions tests/dockerfiles/buildpack_deps_jessie_curl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM debian:jessie

RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
netbase \
wget \
&& rm -rf /var/lib/apt/lists/*

RUN set -ex; \
if ! command -v gpg > /dev/null; then \
apt-get update; \
apt-get install -y --no-install-recommends \
gnupg \
dirmngr \
; \
rm -rf /var/lib/apt/lists/*; \
fi
1 change: 1 addition & 0 deletions tests/dockerfiles/buildpack_deps_jessie_pinned
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM debian@sha256:e25703ee6ab5b2fac31510323d959cdae31eebdf48e88891c549e55b25ad7e94
50 changes: 50 additions & 0 deletions tests/dockerfiles/golang_1.13_stretch
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FROM buildpack-deps:stretch-scm

# gcc for cgo
RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \
gcc \
libc6-dev \
make \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

ENV GOLANG_VERSION 1.13.6

RUN set -eux; \
\
# this "case" statement is generated via "update.sh"
dpkgArch="$(dpkg --print-architecture)"; \
case "${dpkgArch##*-}" in \
amd64) goRelArch='linux-amd64'; goRelSha256='a1bc06deb070155c4f67c579f896a45eeda5a8fa54f35ba233304074c4abbbbd' ;; \
armhf) goRelArch='linux-armv6l'; goRelSha256='37a1a83e363dcf146a67fa839d170fd1afb13009585fdd493d0a3370fbe6f785' ;; \
arm64) goRelArch='linux-arm64'; goRelSha256='0a18125c4ed80f9c3045cf92384670907c4796b43ed63c4307210fe93e5bbca5' ;; \
i386) goRelArch='linux-386'; goRelSha256='27feb013106da784f09e560720aa41ab395c67f7eed4c4a0fce04bc6e3d01c7d' ;; \
ppc64el) goRelArch='linux-ppc64le'; goRelSha256='26a977a8af5dc50a562f0a57b58dded5fa3bacfe77722cf8a84ea54ca54728dd' ;; \
s390x) goRelArch='linux-s390x'; goRelSha256='5cd9900a1fa0f0cac657930b648381cad9b8c5e2bbc77caf86a6fb5cedad0017' ;; \
*) goRelArch='src'; goRelSha256='aae5be954bdc40bcf8006eb77e8d8a5dde412722bc8effcdaf9772620d06420c'; \
echo >&2; echo >&2 "warning: current architecture ($dpkgArch) does not have a corresponding Go binary release; will be building from source"; echo >&2 ;; \
esac; \
\
url="https://golang.org/dl/go${GOLANG_VERSION}.${goRelArch}.tar.gz"; \
wget -O go.tgz "$url"; \
echo "${goRelSha256} *go.tgz" | sha256sum -c -; \
tar -C /usr/local -xzf go.tgz; \
rm go.tgz; \
\
if [ "$goRelArch" = 'src' ]; then \
echo >&2; \
echo >&2 'error: UNIMPLEMENTED'; \
echo >&2 'TODO install golang-any from jessie-backports for GOROOT_BOOTSTRAP (and uninstall after build)'; \
echo >&2; \
exit 1; \
fi; \
\
export PATH="/usr/local/go/bin:$PATH"; \
go version

ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH

RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH"
WORKDIR $GOPATH
107 changes: 107 additions & 0 deletions tests/test_analyze_docker_dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

import unittest

from tern.analyze.docker import dockerfile


class TestAnalyzeDockerDockerfile(unittest.TestCase):

def setUp(self):
self.buildpack = 'tests/dockerfiles/buildpack_deps_jessie_curl'
self.buildpackpinned = 'tests/dockerfiles/buildpack_deps_jessie_pinned'
self.golang = 'tests/dockerfiles/golang_1.13_stretch'

def tearDown(self):
del self.buildpack
del self.golang

def testDockerfileObject(self):
dfobj = dockerfile.Dockerfile()
self.assertTrue(dfobj.is_none())

def testDockerfileParserWithoutEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.buildpack)
self.assertFalse(dfobj.is_none())
self.assertEqual(dfobj.parent_images, ['debian:jessie'])
structure = [{'instruction': 'FROM',
'startline': 0,
'endline': 0,
'content': 'FROM debian:jessie\n',
'value': 'debian:jessie'},
{'instruction': 'RUN',
'startline': 2,
'endline': 7,
'content': ('RUN apt-get update && apt-get install -y --'
'no-install-recommends \\\n\t\tca-certific'
'ates \\\n\t\tcurl \\\n\t\tnetbase \\\n\t\tw'
'get \\\n\t&& rm -rf /var/lib/apt/lists/*'
'\n'),
'value': ('apt-get update && apt-get install -y --no-in'
'stall-recommends \t\tca-certificates \t\tcur'
'l \t\tnetbase \t\twget \t&& rm -rf /var/lib/'
'apt/lists/*')},
{'instruction': 'RUN',
'startline': 9,
'endline': 17,
'content': ('RUN set -ex; \\\n\tif ! command -v gpg > /'
'dev/null; then \\\n\t\tapt-get update; \\'
'\n\t\tapt-get install -y --no-install-reco'
'mmends \\\n\t\t\tgnupg \\\n\t\t\tdirmngr \\'
'\n\t\t; \\\n\t\trm -rf /var/lib/apt/lists/'
'*; \\\n\tfi\n'),
'value': ('set -ex; \tif ! command -v gpg > /dev/null; t'
'hen \t\tapt-get update; \t\tapt-get install -'
'y --no-install-recommends \t\t\tgnupg \t\t\td'
'irmngr \t\t; \t\trm -rf /var/lib/apt/lists/*'
'; \tfi')}]
self.assertEqual(dfobj.structure, structure)
self.assertFalse(dfobj.envs)

def testDockerfileParserWithEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.buildpack,
{'buildno': '123abc'})
self.assertFalse(dfobj.is_none())
self.assertEqual(dfobj.prev_env, {'buildno': '123abc'})

def testReplaceEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.golang)
envs = {'GOLANG_VERSION': '1.13.6',
'GOPATH': '/go',
'PATH': '/go/bin:/usr/local/go/bin:'}
self.assertEqual(dfobj.envs, envs)
struct = dfobj.structure[9]
dockerfile.replace_env(dfobj.envs, struct)
self.assertEqual(struct['content'], 'WORKDIR /go\n')
self.assertEqual(struct['value'], '/go')
replace_content = ('\n\turl="https://golang.org/dl/go1.13.6.'
'${goRelArch}.tar.gz"; ')
replace_value = (' \t\turl="https://golang.org/dl/go1.13.6'
'.${goRelArch}.tar.gz"')
struct = dfobj.structure[5]
dockerfile.replace_env(dfobj.envs, struct)
self.assertEqual(struct['content'].split('\\')[14], replace_content)
self.assertEqual(struct['value'].split(';')[28], replace_value)

def testParseFromImage(self):
dfobj = dockerfile.get_dockerfile_obj(self.buildpack)
image_list = dockerfile.parse_from_image(dfobj)
self.assertEqual(image_list, [{'name': 'debian',
'tag': 'jessie',
'digest_type': '',
'digest': ''}])
dfobj = dockerfile.get_dockerfile_obj(self.buildpackpinned)
image_list = dockerfile.parse_from_image(dfobj)
debian_digest = ('e25703ee6ab5b2fac31510323d959cdae31eebdf48e88891c54'
'9e55b25ad7e94')
self.assertEqual(image_list, [{'name': 'debian',
'tag': '',
'digest_type': 'sha256',
'digest': debian_digest}])


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 6f09399

Please sign in to comment.