1
0
mirror of https://github.com/ansible/mazer.git synced 2026-02-05 12:45:17 +01:00

Load spdx data from json on first use only

Generate a short concise ansible_galaxy/data/spdx_license.json
from the verbose upstream data.

ansible_galaxy.data.spdx_license is a module that caches the
data loaded from json on first use of get_spdx.

- Add tool (spdx_update.py) to build a condensed json file
with data from spdx.json

Use this to parse spdx json data when it is updated
and generated a new source file.

- Add a 'dev/spdx-update' make target

This target pulls down the latest SPDX license
info from https://github.com/spdx/license-list-data
to data/spdx_licenses.json

Then use spdx_update.py to generate the condensed
ansible_galaxy/data/spdx_licenses.json based
on data/spdx_licenses.json

- Validate CollectionInfo off static dict of spdx info.

Previously everytime a CollectionInfo() was created, the
spdx_data was being loaded and parsed.

Now check it against a set of data in ansible_galaxy.data.spdx_licenses

If a license id is in the SPDX_LICENSES dict, it is acceptable.
If the license items value is True, then it is deprecated and
a warning is printed.

TODO: mv the deprecated license check so it only runs on 'build'
and not anytime a collection is loaded
This commit is contained in:
Adrian Likins
2018-11-09 13:41:31 -05:00
parent 037012ae2e
commit f4390adace
8 changed files with 5990 additions and 4741 deletions

View File

@@ -64,5 +64,9 @@ dev/dist: clean ## builds source and wheel package
python setup.py bdist_wheel
ls -l dist
dev/spdx-update: ## update the generated ansible_galaxy/data/spdx_licenses.py
curl https://raw.githubusercontent.com/spdx/license-list-data/master/json/licenses.json --output data/spdx_licenses.json
./spdx_update.py data/spdx_licenses.json > ansible_galaxy/data/spdx_licenses.json ## generate a subset of spdx license info
install: clean ## install the package to the active Python's site-packages
python setup.py install

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
import json
import logging
import os
log = logging.getLogger(__name__)
# use get_spdx() to ref the spdx_license info, so it
# only loaded from disk once
_SPDX_LICENSES = None
def load_spdx():
cwd = os.path.dirname(os.path.abspath(__file__))
license_path = os.path.join(cwd, '..', 'data', 'spdx_licenses.json')
try:
with open(license_path, 'r') as fo:
return json.load(fo)
except EnvironmentError as exc:
log.warning('Unable to open %s to load the list of acceptable open source licenses: %s',
license_path, exc)
log.exception(exc)
return {}
def get_spdx():
global _SPDX_LICENSES
if not _SPDX_LICENSES:
_SPDX_LICENSES = load_spdx()
return _SPDX_LICENSES

View File

@@ -1,12 +1,12 @@
from __future__ import print_function
import attr
import json
import logging
import os
import re
import semver
from ansible_galaxy.data import spdx_licenses
log = logging.getLogger(__name__)
TAG_REGEXP = re.compile('^[a-z0-9]+$')
@@ -57,17 +57,18 @@ class CollectionInfo(object):
@license.validator
def _check_license(self, attribute, value):
cwd = os.path.dirname(os.path.abspath(__file__))
license_path = os.path.join(cwd, '..', 'data', 'spdx_licenses.json')
license_data = json.load(open(license_path, 'r'))
for lic in license_data['licenses']:
if lic['licenseId'] == value:
if lic['isDeprecatedLicenseId']:
print("Warning: collection metadata 'license' value '%s' is "
"deprecated." % value)
return True
self.value_error("Expecting 'license' to be a valid SPDX license ID, instead found '%s'. "
"For more info, visit https://spdx.org" % value)
# load or return already loaded data
licenses = spdx_licenses.get_spdx()
valid = licenses.get(value, None)
if valid is None:
self.value_error("Expecting 'license' to be a valid SPDX license ID, instead found '%s'. "
"For more info, visit https://spdx.org" % value)
# license was in list, but is deprecated
if valid and valid.get('deprecated', None):
print("Warning: collection metadata 'license' value '%s' is "
"deprecated." % value)
@authors.validator
@keywords.validator

4730
data/spdx_licenses.json Normal file

File diff suppressed because it is too large Load Diff

55
spdx_update.py Executable file
View File

@@ -0,0 +1,55 @@
#!/usr/bin/python
import argparse
import json
import logging
import sys
log = logging.getLogger(__name__)
def load_spdx(file_object):
data = json.load(file_object)
return data
def build_short_list(license_data):
licenses = {}
for lic in license_data['licenses']:
lid = lic['licenseId']
licenses[lid] = {'deprecated': lic['isDeprecatedLicenseId']}
return licenses
def json_dumps_license_data(short_license_data):
dict_buf = json.dumps(short_license_data,
indent=4,
sort_keys=True,
separators=(',', ':'))
log.debug('dict_buf: %s', dict_buf)
return dict_buf
def main(argv):
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('input', help='The SPDX license json file input',
type=argparse.FileType())
args = parser.parse_args(argv)
log.debug('args: %s', args)
data = load_spdx(args.input)
short_data = build_short_list(data)
buf = json_dumps_license_data(short_data)
print(buf)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

View File

@@ -22,6 +22,19 @@ def col_info():
return test_data
def test_license_deprecated(col_info):
col_info['license'] = 'AGPL-1.0'
res = CollectionInfo(**col_info)
# Not much to assert, behavior is just a print() side effect
assert res.license == 'AGPL-1.0'
def test_license_unknown(col_info):
col_info['license'] = 'SOME-UNKNOWN'
with pytest.raises(ValueError, match=".*license.*SOME-UNKNOWN.*"):
CollectionInfo(**col_info)
def test_license_error(col_info):
col_info['license'] = 'GPLv2'