Skip to content

Commit

Permalink
scanpypi: get license names from SPDX database
Browse files Browse the repository at this point in the history
Use spdx_lookup package to compare packages' license file texts
with SPDX database.

This feature is optional.

Bonus: fix wrong indentation.

Signed-off-by: Yegor Yefremov <[email protected]>
Signed-off-by: Thomas Petazzoni <[email protected]>
  • Loading branch information
yegorich authored and tpetazzoni committed Jan 12, 2018
1 parent 3cd1908 commit d2ac1ec
Showing 1 changed file with 79 additions and 55 deletions.
134 changes: 79 additions & 55 deletions utils/scanpypi
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ import tempfile
import imp
from functools import wraps

try:
import spdx_lookup as liclookup
except ImportError:
# spdx_lookup is not installed
print('spdx_lookup module is not installed. This can lead to an '
'inaccurate licence detection. Please install it via\n'
'pip install spdx_lookup')
liclookup = None

def setup_decorator(func, method):
"""
Decorator for distutils.core.setup and setuptools.setup.
Expand Down Expand Up @@ -354,71 +363,86 @@ class BuildrootPackage():
lines.append(setup_type_line)
return lines

def __create_mk_license(self):
def __get_license_names(self, license_files):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
Try to determine the related license name.
There are two possibilities. Either the scripts tries to
get license name from package's metadata or, if spdx_lookup
package is available, the script compares license files with
SPDX database.
"""
license_line = ''
if liclookup is None:
license_dict = {
'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD',
'European Union Public Licence 1.0': 'EUPL-1.0',
'European Union Public Licence 1.1': 'EUPL-1.1',
"GNU General Public License": "GPL",
"GNU General Public License v2": "GPL-2.0",
"GNU General Public License v2 or later": "GPL-2.0+",
"GNU General Public License v3": "GPL-3.0",
"GNU General Public License v3 or later": "GPL-3.0+",
"GNU Lesser General Public License v2": "LGPL-2.1",
"GNU Lesser General Public License v2 or later": "LGPL-2.1+",
"GNU Lesser General Public License v3": "LGPL-3.0",
"GNU Lesser General Public License v3 or later": "LGPL-3.0+",
"GNU Library or Lesser General Public License": "LGPL-2.0",
"ISC License": "ISC",
"MIT License": "MIT",
"Mozilla Public License 1.0": "MPL-1.0",
"Mozilla Public License 1.1": "MPL-1.1",
"Mozilla Public License 2.0": "MPL-2.0",
"Zope Public License": "ZPL"
}
regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
classifiers_licenses = [regexp.sub(r"\1", lic)
for lic in self.metadata['info']['classifiers']
if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses)
if not len(licenses):
print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format(
license=', '.join(licenses)))
licenses = [self.metadata['info']['license']]
license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name,
license=', '.join(licenses))
else:
license_names = []
for license_file in license_files:
with open(license_file) as lic_file:
match = liclookup.match(lic_file.read())
if match.confidence >= 90.0:
license_names.append(match.license.id)

The license is found using the metadata from pypi.
In the metadata, the license can be found either with standard names in
the classifiers part or with naming from the packager in the "License"
part.
if len(license_names) > 0:
license_line = ('{name}_LICENSE ='
' {names}\n'.format(
name=self.mk_name,
names=', '.join(license_names)))

From the classifiers, the license is "translated" according to
buildroot standards if need be (i.e. from Apache Software License to
Apache-2.0).
return license_line

From the License part, we cannot guess what formatting the packager
used. Hence, it is likely to be incorrect. (i.e. Apache License 2.0
instead of Apache-2.0).
def __create_mk_license(self):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
The license's files are found by searching the package for files named
license or license.txt (case insensitive).
If more than one license file is found, the user is asked to select
which ones he wants to use.
The license's files are found by searching the package (case insensitive)
for files named license, license.txt etc. If more than one license file
is found, the user is asked to select which ones he wants to use.
"""
license_dict = {
'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD',
'European Union Public Licence 1.0': 'EUPL-1.0',
'European Union Public Licence 1.1': 'EUPL-1.1',
"GNU General Public License": "GPL",
"GNU General Public License v2": "GPL-2.0",
"GNU General Public License v2 or later": "GPL-2.0+",
"GNU General Public License v3": "GPL-3.0",
"GNU General Public License v3 or later": "GPL-3.0+",
"GNU Lesser General Public License v2": "LGPL-2.1",
"GNU Lesser General Public License v2 or later": "LGPL-2.1+",
"GNU Lesser General Public License v3": "LGPL-3.0",
"GNU Lesser General Public License v3 or later": "LGPL-3.0+",
"GNU Library or Lesser General Public License": "LGPL-2.0",
"ISC License": "ISC",
"MIT License": "MIT",
"Mozilla Public License 1.0": "MPL-1.0",
"Mozilla Public License 1.1": "MPL-1.1",
"Mozilla Public License 2.0": "MPL-2.0",
"Zope Public License": "ZPL"
}
regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
classifiers_licenses = [regexp.sub(r"\1", lic)
for lic in self.metadata['info']['classifiers']
if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses)
lines = []
if not len(licenses):
print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format(
license=', '.join(licenses)))
licenses = [self.metadata['info']['license']]
license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name,
license=', '.join(licenses))
lines.append(license_line)

filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
'COPYING', 'COPYING.TXT']
'COPYING', 'COPYING.TXT']
license_files = list(find_file_upper_case(filenames, self.tmp_extract))

lines.append(self.__get_license_names(license_files))

license_files = [license.replace(self.tmp_extract, '')[1:]
for license in license_files]
if len(license_files) > 0:
Expand Down

0 comments on commit d2ac1ec

Please sign in to comment.