Skip to content

Commit

Permalink
Integrate scancode data
Browse files Browse the repository at this point in the history
These changes are work towards tern-tools#480

This commit adds a setter for file_type in the
FileData class and also integrates
the scancode data into Tern's data model.
  • Loading branch information
rnjudge authored Mar 6, 2020
2 parents a4c107f + 403645f commit c9ff72f
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 37 deletions.
33 changes: 27 additions & 6 deletions tern/analyze/passthrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


import logging
import os
import shutil
from stevedore import driver
from stevedore.exception import NoMatches
Expand All @@ -21,25 +22,45 @@
logger = logging.getLogger(constants.logger_name)


def get_filesystem_command(layer_obj, command):
'''Given an ImageLayer object and a command in the form of a string,
return the command in list form with the target directory of the layer.
This assumes that the layer tarball is untarred, which should have happened
during the loading of the Image object'''
cmd_list = command.split(' ')
def get_exec_command(command_string):
'''Given a command as a string, find out if the command exists on the
system. If it does exist, return a subprocess invokable command list
where the command is the absolute path of the binary existing on the
system'''
cmd_list = command_string.split(' ')
# we first find if the command exists on the system
run_bin = cmd_list.pop(0)
bin_path = shutil.which(run_bin)
if not bin_path:
raise OSError("Command {} not found".format(run_bin))
cmd_list.insert(0, bin_path)
return cmd_list


def get_filesystem_command(layer_obj, command):
'''Given an ImageLayer object and a command in the form of a string,
return the command in list form with the target directory of the layer.
This assumes that the layer tarball is untarred, which should have happened
during the loading of the Image object'''
# in most cases, the external tool has a CLI where the target directory
# is the last token in the command. So the most straightforward way
# to perform this operation is to append the target directory
cmd_list = get_exec_command(command)
cmd_list.append(rootfs.get_untar_dir(layer_obj.tar_file))
return cmd_list


def get_file_command(layer_tar_file, layer_file, command):
'''Given an ImageLayer object's tar_file property and a FileData object
from that layer, along with the command, return the command in list form
with the target file appended at the end'''
cmd_list = get_exec_command(command)
file_path = os.path.join(
rootfs.get_untar_dir(layer_tar_file), layer_file.path)
cmd_list.append(file_path)
return cmd_list


def execute_external_command(layer_obj, command, is_sudo=False):
'''Given an Imagelayer object and a command in the form of a list, execute
the command and store the results in the ImageLayer object either as
Expand Down
4 changes: 4 additions & 0 deletions tern/classes/file_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def version(self):
def file_type(self):
return self.__file_type

@file_type.setter
def file_type(self, file_type):
self.__file_type = file_type

@property
def origins(self):
return self.__origins
Expand Down
115 changes: 84 additions & 31 deletions tern/extensions/scancode/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@

import json
import logging
import sys

from tern.analyze.passthrough import get_filesystem_command
from tern.analyze.passthrough import get_file_command
from tern.classes.notice import Notice
from tern.classes.file_data import FileData
from tern.extensions.executor import Executor
from tern.utils import constants
from tern.utils import rootfs
Expand All @@ -28,44 +29,96 @@
logger = logging.getLogger(constants.logger_name)


def run_on_image(image_obj, command):
'''Scancode errors out when it fails to scan any file it is given even
if it is successful with other files. Hence we cannot use the available
run_on_image function in the passthrough module. Instead we will check
if a json object was returned or not'''
if not command:
logger.error("No command to execute. No report will be generated")
return False
for layer in image_obj.layers:
layer.files_analyzed = True
full_cmd = get_filesystem_command(layer, command)
origin_layer = 'Layer: ' + layer.fs_hash[:10]
def analyze_layer(layer_obj):
'''Use scancode to analyze the layer's contents. Create file objects
and add them to the layer object. Add any Notices to the FileData objects
'''
# run scancode against a directory
command = 'scancode -ilpcu --quiet --json -'
full_cmd = get_filesystem_command(layer_obj, command)
origin_layer = 'Layer: ' + layer_obj.fs_hash[:10]
result, error = rootfs.shell_command(True, full_cmd)
if not result:
logger.error(
"No scancode results for this layer: %s", str(error))
layer_obj.origins.add_notice_to_origins(
origin_layer, Notice(str(error), 'error'))
else:
# make FileData objects for each result
data = json.loads(result)
for f in data['files']:
if f['type'] == 'file':
fd = FileData(f['name'], f['path'], f['date'], f['file_type'])
fd.set_checksum('sha1', f['sha1'])
if f['licenses']:
fd.licenses = [l['short_name'] for l in f['licenses']]
fd.license_expressions = f['license_expressions']
if f['copyrights']:
fd.copyrights = [c['value'] for c in f['copyrights']]
if f['urls']:
fd.urls = [u['url'] for u in f['urls']]
fd.packages = f['packages']
fd.authors = f['authors']
if f['scan_errors']:
# for each scan error make a notice
for err in f['scan_errors']:
fd.origins.add_notice_to_origins(
'File: ' + fd.path, Notice(err, 'error'))
# add filedata object to layer
layer_obj.add_file(fd)


def analyze_file(layer_obj):
'''Use scancode to analyze files Tern has already found in an image layer.
For each file in the layer, run scancode on the file. We assume that we
already have the files names, paths and checksums filled out'''
# run scancode against each file
command = 'scancode -ilpcu --quiet --json -'
for fd in layer_obj.files:
full_cmd = get_file_command(layer_obj.tar_file, fd.path, command)
origin_file = 'File: ' + fd.path
result, error = rootfs.shell_command(True, full_cmd)
if not result:
logger.error(
"No scancode results for this layer: %s", str(error))
layer.origins.add_notice_to_origins(
origin_layer, Notice(str(error), 'error'))
layer.analyzed_output = result.decode()
return True
"No scancode results for this file: %s", str(error))
fd.origins.add_notice_to_origins(
origin_file, Notice(str(error), 'error'))
else:
# Fill the results into the FileData object
data = json.loads(result)['files'][0]
fd.date = data['date']
fd.file_type = data['file_type']
if data['licenses']:
fd.licenses = [l['short_name'] for l in data['licenses']]
fd.license_expressions = data['license_expressions']
if data['copyrights']:
fd.copyrights = [c['value'] for c in data['copyrights']]
if data['urls']:
fd.urls = [u['url'] for u in data['urls']]
fd.packages = data['packages']
fd.authors = data['authors']
if data['scan_errors']:
# for each scan error make a notice
for err in data['scan_errors']:
fd.origins.add_notice_to_origins(
origin_file, Notice(err, 'error'))
# add filedata object to layer
layer_obj.add_file(fd)


class Scancode(Executor):
'''Execute scancode'''
def execute(self, image_obj):
'''Execution should be:
scancode -lpcu --quiet --json - /path/to/directory
scancode -ilpcu --quiet --json - /path/to/directory
'''
command = 'scancode -lpcu --quiet --json -'
# run the command against the image filesystems
if not run_on_image(image_obj, command):
sys.exit(1)
# for now we just print the file path and licenses found if there are
# any licenses are found
for layer in image_obj.layers:
print('Layer: {}'.format(layer.diff_id[:10]))
results = json.loads(layer.analyzed_output)
for afile in results['files']:
if afile['licenses']:
license_str = ','.join(l['key'] for l in afile['licenses'])
print('{}: {}'.format(afile['path'], license_str))
layer.files_analyzed = True
if layer.files:
# If the layer already has files processed, then run
# scancode per file
analyze_file(layer)
else:
# If there was no file processing done, scancode will process
# them for you
analyze_layer(layer)
2 changes: 2 additions & 0 deletions tests/test_class_file_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def testInstance(self):
file2 = FileData('file2',
'path/to/file2',
'12355')
file1.file_type = 'ELF'
self.assertEqual(file1.file_type, 'ELF')
file2 = FileData('file2',
'path/to/file2',
'2020-01-01',
Expand Down

0 comments on commit c9ff72f

Please sign in to comment.