From 1b6bc79b78f85ba1c3d17ac608c4801294c0afbc Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 28 Jul 2021 02:40:51 -0400 Subject: [PATCH 01/12] First attempt at Python script to clean up extensions. --- build/ci-cd/python/convert_filetypes.py | 198 ++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 build/ci-cd/python/convert_filetypes.py diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py new file mode 100644 index 0000000000..e3a7e6ee52 --- /dev/null +++ b/build/ci-cd/python/convert_filetypes.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 + +from argparse import ArgumentParser +import logging +from functools import reduce +import operator +import json +import os +from pprint import pprint +from ruamel.yaml import YAML +from lxml import etree +import sys + +def find(data={}, lookups=[], path=None): + path = path if path else [] + + # In case this is a list + if isinstance(data, list): + for index, value in enumerate(data): + new_path = list(path) + new_path.append(index) + for result in find(value, lookups, path=new_path): + yield result + + # In case this is a dictionary + if isinstance(data, dict): + for key, value in data.items(): + new_path = list(path) + new_path.append(key) + for result in find(value, lookups, path=new_path): + yield result + + if key == lookups[0]: + new_path = list(path) + matches = { l: data[l] for l in lookups if data.get(l) } + yield { 'path': new_path, **matches } + +def find_xml(data={}, namespaces={}, lookups=None): + for result in list(data.xpath(lookups, namespaces=namespaces)): + yield { + 'path': get_full_xpath(result), + **result.attrib + } + +def get_full_xpath(element=None): + if element.getparent() is None: return f"/{element.tag}" + return f"{get_full_xpath(element.getparent())}/{element.tag}" + +def replace(items=[{}], old='', new=''): + for i in items: + update = {} + + for k in i.keys(): + old_value = i[k] + update[k] = old_value.replace(old, new) if isinstance(old_value, str) else old_value + + if k != 'path' and update[k] != old_value: + update['original'] = {} if not update.get('original') else update.get('original') + update['original'][k] = old_value + + if update == i: + yield + + else: + yield update + +def pick(data={}, path=[]): + return reduce(operator.getitem, path, data) + +def pick_xml(data={}, namespaces={}, path=None): + results = data.xpath(path, namespaces=namespaces) + if len(results) > 0: return results[0] + +def update(data, updates=[{}], originals=[{}], compare_key=None): + maybe_dupes = [o.get(compare_key) for o in originals] + + for update in updates: + if update.get(compare_key) in maybe_dupes: + continue + + target = pick(data, update.get('path')) + + for k in update.keys(): + if target.get(k) and not k == 'path' and not k == 'original': + target[k] = update[k] + +def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=None): + maybe_dupes = [o.get(compare_key) for o in originals] + + for update in updates: + if update.get(compare_key) in maybe_dupes: + continue + + path = update.get('path').replace(f"{{{namespaces.get('xmlns')}}}",'oscal:') + path += f"[@{compare_key}=\"{update.get('original').get(compare_key)}\"]" + + target = pick_xml(data, namespaces, path) + + for k in update.keys(): + if not k == 'path' and not k == 'original': + target.attrib[k] = update[k] + +def process_json(file, old='', new='', dry_run=False): + try: + with open(file) as fd: + raw_data = fd.read() + data = json.loads(raw_data) + links = list(find(data, ['href', 'media-type'])) + replacements = list(r for r in replace(links, 'xml', 'json') if r) + update(data, replacements, links, 'href') + + new_file = dry_run_file(file) if dry_run else file + + with open(new_file, 'w') as fd: + json.dump(data, fd, indent=2) + + except Exception as err: + logging.exception(err) + +def process_xml(file, old='', new='', dry_run=False): + try: + with open(file) as fd: + namespaces = { + 'xmlns': 'http://csrc.nist.gov/ns/oscal/1.0', + 'oscal': 'http://csrc.nist.gov/ns/oscal/1.0' + } + data = etree.parse(fd) + links = list(find_xml(data, namespaces, "//*[@href or @media-type]")) + replacements = list(r for r in replace(links, 'yaml', 'xml') if r) + update_xml(data, namespaces, replacements, links, 'href') + + new_file = dry_run_file(file) if dry_run else file + + with open(new_file, 'wb') as fd: + fd.write(etree.tostring(data, encoding='utf-8', xml_declaration=True, pretty_print=True)) + + except Exception as err: + logging.exception(err) + +def process_yaml(file, old='', new='', dry_run=False): + try: + with open(file) as fd: + raw_data = fd.read() + # Keep orderiing and formatting to the best extent possible, with + # the round trip mode, rt. + yaml = YAML(typ='rt') + # Add back leading --- to output + yaml.explicit_start = True + # Indent sequences properly, to prevent this, no spaces before id. + # roles: + # - id: role-a + yaml.indent(mapping=2, sequence=4, offset=2) + # Do not rewrite " as ', keep double quotes. + yaml.preserve_quotes = True + # If you do not do this, datetimes are reformatted with microseconds. + # See: https://stackoverflow.com/a/51002826 + yaml.constructor.yaml_constructors.pop(u'tag:yaml.org,2002:timestamp', None) + data = yaml.load(raw_data) + links = list(find(data, ['href', 'media-type'])) + replacements = list(r for r in replace(links, 'json', 'yaml') if r) + update(data, replacements, links, 'href') + + new_file = dry_run_file(file) if dry_run else file + + with open(new_file, 'w') as fd: + yaml.dump(data, fd) + + except Exception as err: + logging.exception(err) + +def dry_run_file(file): + file, file_ext = os.path.splitext(file) + return f"{file}_test.{file_ext}" + +def process(): + parser = ArgumentParser(description='Convert file extensions in fields with hyperlinks for OSCAL JSON, XML, and YAML document instances.') + parser.add_argument('--old-extension', '-o', dest='old', type=str, help='original file extension you want to convert from') + parser.add_argument('--new-extension', '-n', dest='new', type=str, help='target file extension you want to convert to') + parser.add_argument('-d', '--dry-run', action='store_true', help='Provide this argument to test by modifying an adjacent test file.') + parser.add_argument('file', type=str, help='path of original file') + + args = parser.parse_args() + + _, file_ext = os.path.splitext(args.file) + + if file_ext == '.json': + process_json(**vars(args)) + elif file_ext == '.xml': + process_xml(**vars(args)) + elif file_ext == '.yaml': + process_yaml(**vars(args)) + else: + logging.error(f"Cannot convert invalid OSCAL file with extension '{file_ext}'.") + + sys.exit(1) + +if __name__ == '__main__': + process() \ No newline at end of file From 2f1b1a8ae607a75a33f7bcbbf35fb767f25ed6fd Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 28 Jul 2021 02:43:12 -0400 Subject: [PATCH 02/12] Add Python deps manifest. --- build/ci-cd/python/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 build/ci-cd/python/requirements.txt diff --git a/build/ci-cd/python/requirements.txt b/build/ci-cd/python/requirements.txt new file mode 100644 index 0000000000..505a16d5a6 --- /dev/null +++ b/build/ci-cd/python/requirements.txt @@ -0,0 +1,3 @@ +lxml==4.6.3 +ruamel.yaml==0.17.10 +ruamel.yaml.clib==0.2.6 From 4dda8b6f2a5ac766274c81db34f514b696a8b287 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 28 Jul 2021 02:53:05 -0400 Subject: [PATCH 03/12] Update Dockerfile with Python dependencies. --- build/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build/Dockerfile b/build/Dockerfile index 213120138c..6765f620b2 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 ARG saxonversion ARG hugoversion +ADD ./ci-cd/python/requirements.txt . + ENV TZ=US/Eastern RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -10,7 +12,7 @@ RUN apt-get update && apt-get install -y wget apt-utils libxml2-utils jq maven n RUN npm install -g npm n RUN n latest RUN npm install --loglevel verbose -g ajv-cli@"^4.0.x" ajv-formats@"^1.5.x" json-diff markdown-link-check yaml-convert@"^1.0.x" yargs -RUN pip3 install lxml +RUN pip3 install -r requirements.txt #RUN useradd --create-home --home-dir /home/user user #USER user From cc26c8fcca60c462666a3cb3cd77bf8b66bc71f4 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 28 Jul 2021 03:31:37 -0400 Subject: [PATCH 04/12] Remove additive dot before file ext and try out adding to CI harness. --- build/ci-cd/copy-and-convert-content.sh | 8 +------- build/ci-cd/python/convert_filetypes.py | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/build/ci-cd/copy-and-convert-content.sh b/build/ci-cd/copy-and-convert-content.sh index aa4169076d..fe30cd4fea 100755 --- a/build/ci-cd/copy-and-convert-content.sh +++ b/build/ci-cd/copy-and-convert-content.sh @@ -154,13 +154,7 @@ post_process_content() { if [ "$VERBOSE" = "true" ]; then echo -e "${P_INFO}Translating relative XML paths to JSON paths in '${P_END}${target_file_relative}${P_INFO}'.${P_END}" fi - # Remove extra slashes - perl -pi -e 's,\\/,/,g' "${target_file}" - # translate OSCAL mime types - perl -pi -e 's,(application/(oscal\.)?[a-z]+\+)xml\",\1json\",g' "${target_file}" - # relative content paths - # translate path names for local references - perl -pi -e 's,((?:\.\./)+(?:(?!xml/)[^\s/"'']+/)+)xml/((?:(?!.xml)[^\s"'']+)+).xml,\1json/\2.json,g' "${target_file}" + python "$OSCALDIR/build/ci-cd/python/convert-filetypes.py" --old-extension xml --new-extension json "${target_file}" --dry-run fi # produce pretty JSON diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index e3a7e6ee52..2a5deef85a 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -170,7 +170,7 @@ def process_yaml(file, old='', new='', dry_run=False): def dry_run_file(file): file, file_ext = os.path.splitext(file) - return f"{file}_test.{file_ext}" + return f"{file}_test{file_ext}" def process(): parser = ArgumentParser(description='Convert file extensions in fields with hyperlinks for OSCAL JSON, XML, and YAML document instances.') From e2c3ff78759d5274de425ad7c4c367aa909cec5a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 12 Aug 2021 14:42:20 -0400 Subject: [PATCH 05/12] Add first attempt of Python converter. --- build/ci-cd/copy-and-convert-content.sh | 8 ++------ build/ci-cd/python/convert_filetypes.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/build/ci-cd/copy-and-convert-content.sh b/build/ci-cd/copy-and-convert-content.sh index fe30cd4fea..427f8d9d01 100755 --- a/build/ci-cd/copy-and-convert-content.sh +++ b/build/ci-cd/copy-and-convert-content.sh @@ -154,7 +154,7 @@ post_process_content() { if [ "$VERBOSE" = "true" ]; then echo -e "${P_INFO}Translating relative XML paths to JSON paths in '${P_END}${target_file_relative}${P_INFO}'.${P_END}" fi - python "$OSCALDIR/build/ci-cd/python/convert-filetypes.py" --old-extension xml --new-extension json "${target_file}" --dry-run + python "$OSCALDIR/build/ci-cd/python/convert-filetypes.py" --old-extension xml --new-extension json "${target_file}" fi # produce pretty JSON @@ -199,11 +199,7 @@ post_process_content() { if [ "$VERBOSE" = "true" ]; then echo -e "${P_INFO}Translating relative paths in '${P_END}${yaml_file_relative}${P_INFO}'.${P_END}" fi - # translate OSCAL mime types - perl -pi -e 's,(application/oscal\.[a-z]+\+)json\",\1yaml\",g' "${yaml_file}" - # translate path names for local references - perl -pi -e 's,((?:\.\./)+(?:(?!json/)[^\s/"'']+/)+)json/((?:(?!.json)[^\s"'']+)+).json,\1yaml/\2.yaml,g' "${yaml_file}" - + python "$OSCALDIR/build/ci-cd/python/convert-filetypes.py" --old-extension json --new-extension yaml "${yaml_file}" echo -e "${P_OK}Created YAML '${P_END}${yaml_file_relative}${P_OK}'.${P_END}" ;; xml) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 2a5deef85a..552ccdb1c2 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -191,8 +191,7 @@ def process(): process_yaml(**vars(args)) else: logging.error(f"Cannot convert invalid OSCAL file with extension '{file_ext}'.") - - sys.exit(1) + sys.exit(1) if __name__ == '__main__': process() \ No newline at end of file From 4e72bafc1cd3b20954e07912e469053ba8f52693 Mon Sep 17 00:00:00 2001 From: Michaela Iorga Date: Thu, 16 Sep 2021 15:57:50 -0400 Subject: [PATCH 06/12] updated the tools table --- docs/content/tools/_index.md | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/content/tools/_index.md b/docs/content/tools/_index.md index ca34bf5c7f..615922e0d4 100644 --- a/docs/content/tools/_index.md +++ b/docs/content/tools/_index.md @@ -4,17 +4,21 @@ heading: OSCAL Tools menu: primary: name: Tools - weight: 30 + weight: 50 +toc: + enabled: true +toc: + enabled: true --- -The [OSCAL models](/documentation/schema/) provide standardized formats for exchanging control, control implementation, and control assessment information in XML, JSON, and YAML. These formats allow this information to be exchanged between tools and for individual tools to process exchanged data, supporting analytics, user interaction, and increased automation. +The [OSCAL models](/concepts/layer/) provide standardized formats for exchanging control, control implementation, and control assessment information in XML, JSON, and YAML. These formats allow this information to be exchanged between tools and for individual tools to process exchanged data, supporting analytics, user interaction, and increased automation. Tools exist that support the use of the OSCAL models. These tools are listed below in the following categories: - OSCAL Project provided tools and libraries - Open Source Tools provided by 3rd parties -If you have produced a tool that supports the OSCAL formats that you would like to have listed on this page, please [contact us](/contribute/contact/). +If you have produced a tool that supports the OSCAL formats that you would like to have listed on this page, please [contact us](/contact/). ## Disclaimer @@ -22,12 +26,14 @@ If you have produced a tool that supports the OSCAL formats that you would like See the [NIST Software Disclaimer](https://www.nist.gov/disclaimer) for more information. -## OSCAL Project: Open Source Tools and Libraries - -- **[OSCAL Java Library:](https://github.com/usnistgov/liboscal-java)** Provides a Java-based programming API for reading and writing content conformant to the OSCAL XML, JSON, and YAML based models. This library is kept up-to-date with the [latest formats](/documentation/schema/) provided by the OSCAL project. -- **[XSLT Tooling:](https://github.com/usnistgov/oscal-tools/tree/master/xslt)** A variety of Extensible Stylesheet Language (XSL) Transformations (XSLT), Cascading Style Sheets (CSS), and related utilities for authoring, converting, and publishing OSCAL content in various forms. - -## Community: Open Source Tools and Libraries - -- **[OSCALkit:](https://github.com/docker/oscalkit)** Provides a GoLang SDK for OSCAL. Converts OSCAL XML -> JSON (and vice versa). Converts [OpenControl](https://open-control.org/) projects into OSCAL. -- **[OSCAL GUI:](https://github.com/brianrufgsa/OSCAL-GUI)** A proof of concept GUI tool for interacting with OSCAL content based on [OSCAL milestone 2](https://github.com/usnistgov/OSCAL/releases/tag/v1.0.0-milestone2). +## OSCAL Tools and Libraries + +| Name | Provider/Developer | Description | Type | +|:---|:---|:---|:---| +| [Compliance trestle](https://github.com/IBM/compliance-trestle) | IBM | A python SDK and command line tool which manipulates OSCAL structures and supports transformation of data into OSCAL. | open source | +| [OSCAL Java Library](https://github.com/usnistgov/liboscal-java) | NIST OSCAL Project | A Java-based programming API for reading and writing content conformant to the OSCAL XML, JSON, and YAML based models. | open source | +| [OSCAL React Component Library](https://github.com/EasyDynamics/oscal-react-library) | Easy Dynamics | A library of reusable React components and an [example user interface application](https://oscal-viewer.msd.easydynamics.com/catalog) that provides a direct UI into OSCAL. | open source | +| [OSCAL REST API](https://github.com/EasyDynamics/oscal-rest) | Easy Dynamics | An initial OpenAPI definition of an OSCAL REST API that describes how systems might manipulate catalogs, profiles, components, and SSPs. | open source | +| [XSLT Tooling](https://github.com/usnistgov/oscal-tools/tree/master/xslt) | NIST OSCAL Project | A variety of Extensible Stylesheet Language (XSL) Transformations (XSLT), Cascading Style Sheets (CSS), and related utilities for authoring, converting, and publishing OSCAL content in various forms. | open source | +| [XML Jelly Sandwich](https://github.com/wendellpiez/XMLjellysandwich) | Wendell Piez (NIST) | Interactive XSLT in the browser includes [OSCAL demonstrations](https://wendellpiez.github.io/XMLjellysandwich/oscal/). | open source | +| [Xacta 360](https://www.telos.com/offerings/xacta-360-continuous-compliance-assessment/) | Telos | Xacta 360 is a cyber risk management and compliance analytics platform that enables users to create and submit FedRAMP system security plans (SSPs) in OSCAL format. Future OSCAL capabilities are forthcoming as the standard evolves. | commercial | \ No newline at end of file From 244e89d52690585baae8b71acd22389e4c1190db Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 21 Sep 2021 20:52:59 -0400 Subject: [PATCH 07/12] Rename handler function accordingly, so docs make sense. --- build/ci-cd/python/convert_filetypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 552ccdb1c2..08758b9647 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -172,7 +172,7 @@ def dry_run_file(file): file, file_ext = os.path.splitext(file) return f"{file}_test{file_ext}" -def process(): +def handler((): parser = ArgumentParser(description='Convert file extensions in fields with hyperlinks for OSCAL JSON, XML, and YAML document instances.') parser.add_argument('--old-extension', '-o', dest='old', type=str, help='original file extension you want to convert from') parser.add_argument('--new-extension', '-n', dest='new', type=str, help='target file extension you want to convert to') @@ -194,4 +194,4 @@ def process(): sys.exit(1) if __name__ == '__main__': - process() \ No newline at end of file + handler(() \ No newline at end of file From 2bb327f6ed981dddd4c1ef00bb7f58d2242821c8 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 21 Sep 2021 23:53:52 -0400 Subject: [PATCH 08/12] Fix bug with hard-coded conversion for JSON processing. --- build/ci-cd/python/convert_filetypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 08758b9647..2889b93ee5 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -106,7 +106,7 @@ def process_json(file, old='', new='', dry_run=False): raw_data = fd.read() data = json.loads(raw_data) links = list(find(data, ['href', 'media-type'])) - replacements = list(r for r in replace(links, 'xml', 'json') if r) + replacements = list(r for r in replace(links, old, new) if r) update(data, replacements, links, 'href') new_file = dry_run_file(file) if dry_run else file From 2584ddc6d01a3d67112df604772ade13bb5384ca Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 22 Sep 2021 00:09:51 -0400 Subject: [PATCH 09/12] Add some more function documentation. --- build/ci-cd/python/convert_filetypes.py | 105 +++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 2889b93ee5..e699808dd7 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -85,6 +85,34 @@ def update(data, updates=[{}], originals=[{}], compare_key=None): target[k] = update[k] def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=None): + """Iterates through a list of potential updates matched from an OSCAL XML + document instance, checking for duplicates as defined by a compare key to + avoid modifications where post-update there would be duplicate adjacent + elements. + + :param data: the OSCAL XML document data to be modified, passed by reference + :type data: dict + + :param namespaces: a collection of additional XML namespaces to consider when + processing additional updates, not including `oscal` or `o`, the default XML + for NIST OSCAL development. + :type namespaces: dict + + :param updates: a subset list of potential updates with confirmed matches for given + key-value pairs from originals, the complete list of matching keys (whether or not + the value is a match) from the target OSCAL XML document instance. + :type updates: list + + :param originals: the complete list of all fields that match a given key found by + searching a target OSCAL XML document instance. + :type originals: list + + :param compare_key: + :type compare_key: str + + :return: None + :rtype: None + """ maybe_dupes = [o.get(compare_key) for o in originals] for update in updates: @@ -101,6 +129,25 @@ def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=No target.attrib[k] = update[k] def process_json(file, old='', new='', dry_run=False): + """Analyze OSCAL JSON document instances and replace extensions for href + fields accordingly. + + :param file: the original target path and file + :type file: str + + :param old: the original file extension to match + :type old: str + + :param new: the replacement file extension to replace upon match + :type new: str + + :param dry_run: a setting, when True, that enables dry-run mode to prevent + editing of the original target file; defaults to False + :type dry_run: bool + + :return: an updated path and filename for writing dry run results + :rtype: str + """ try: with open(file) as fd: raw_data = fd.read() @@ -118,6 +165,26 @@ def process_json(file, old='', new='', dry_run=False): logging.exception(err) def process_xml(file, old='', new='', dry_run=False): + """Analyze OSCAL XML document instances and replace extensions for href + fields accordingly. + + :param file: the original target path and file + :type file: str + + :param old: the original file extension to match + :type old: str + + :param new: the replacement file extension to replace upon match + :type new: str + + :param dry_run: a setting, when True, that enables dry-run mode to prevent + editing of the original target file; defaults to False + :type dry_run: bool + + :return: an updated path and filename for writing dry run results + :rtype: str + """ + try: with open(file) as fd: namespaces = { @@ -138,6 +205,25 @@ def process_xml(file, old='', new='', dry_run=False): logging.exception(err) def process_yaml(file, old='', new='', dry_run=False): + """Analyze OSCAL YAML document instances and replace extensions for href + fields accordingly. + + :param file: the original target path and file + :type file: str + + :param old: the original file extension to match + :type old: str + + :param new: the replacement file extension to replace upon match + :type new: str + + :param dry_run: a setting, when True, that enables dry-run mode to prevent + editing of the original target file; defaults to False + :type dry_run: bool + + :return: an updated path and filename for writing dry run results + :rtype: str + """ try: with open(file) as fd: raw_data = fd.read() @@ -169,10 +255,25 @@ def process_yaml(file, old='', new='', dry_run=False): logging.exception(err) def dry_run_file(file): + """Format a file name properly for dry-run mode, e.g. do not edit the original + target file and return a modified path to modify a temporary file. + + :param file: the original target path and file + :type file: str + + :return: an updated path and filename for writing dry run results + :rtype: str + """ file, file_ext = os.path.splitext(file) return f"{file}_test{file_ext}" -def handler((): +def handler(): + """Core function that encapsulates complete operational logic of the script, + as not to pollute the '__main__' scope. + + :return: None + :rtype: None + """ parser = ArgumentParser(description='Convert file extensions in fields with hyperlinks for OSCAL JSON, XML, and YAML document instances.') parser.add_argument('--old-extension', '-o', dest='old', type=str, help='original file extension you want to convert from') parser.add_argument('--new-extension', '-n', dest='new', type=str, help='target file extension you want to convert to') @@ -194,4 +295,4 @@ def handler((): sys.exit(1) if __name__ == '__main__': - handler(() \ No newline at end of file + handler() \ No newline at end of file From 221eaf2d6ca0accd0fa63370aac89788b75e036f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 27 Sep 2021 23:50:29 -0400 Subject: [PATCH 10/12] Complete update of function docstrings. --- build/ci-cd/python/convert_filetypes.py | 140 +++++++++++++++++++++++- 1 file changed, 135 insertions(+), 5 deletions(-) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index e699808dd7..25783f5bec 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -12,6 +12,27 @@ import sys def find(data={}, lookups=[], path=None): + """Search data from an OSCAL JSON or YAML document instance in dictionary + form and perform lookups to find one or more keys for the names of OSCAL + fields or flags. + + :param data: a parsed OSCAL JSON or YAML document instance in dictionary + form + :type data: dict + + :param lookups: a list of zero, one, or more key names when looking for key + value pairs to match + :type lookups: list[str] + + :param path: an optional path for the JSON or YAML location of the current + path, if not at the beginning of traversing the nested dictionaries, and + will update the path as it walks the recursive nested structure + :type path: str + + :return: an iterable sequence (from yield) with a dictionary of each match + found + :rtype: Iterable[dict] + """ path = path if path else [] # In case this is a list @@ -36,6 +57,23 @@ def find(data={}, lookups=[], path=None): yield { 'path': new_path, **matches } def find_xml(data={}, namespaces={}, lookups=None): + """Search data from an OSCAL XML document instance with a XPath query + to perform lookups to find one or more keys for the names of OSCAL + fields or flags. + + :param data: XML data to search + :type data: lxml.etree._ElementTree + + :namespaces: an optional list of XML namespaces to constrain your search + :type namespaces: dict + + :param lookups: a XPath query used to perform the search + :type lookups: str + + :return: an iterable sequence (from yield) with a dictionary of each match + found + :rtype: Iterable[dict] + """ for result in list(data.xpath(lookups, namespaces=namespaces)): yield { 'path': get_full_xpath(result), @@ -43,10 +81,36 @@ def find_xml(data={}, namespaces={}, lookups=None): } def get_full_xpath(element=None): + """Construct a XPath query with an absolute path to the instance of the + element passed in place of a relative one. + + :param element: the target XML element + :type element: lxml.etree._ElementTree + + :return: the absolute path to the element + :rtype: str + """ if element.getparent() is None: return f"/{element.tag}" return f"{get_full_xpath(element.getparent())}/{element.tag}" def replace(items=[{}], old='', new=''): + """Takes OSCAL JSON, XML, and YAML source data matches as key value pairs + in memory and makes changes in place, before persisting results to disk. + + This uses the python str.replace function, so substring or complete string + replacements are supported. If `old` does not match the complete original + string of an item in `items` it only replaces that substring. + + :param items: the list of matched items that will be changed and have values + replaced. + :type items: list[dict] + + :param old: the 'old' target value that is a candidate for replacement + :type old: str + + :param new: the 'new' value to replace + :type new: str + """ for i in items: update = {} @@ -65,13 +129,76 @@ def replace(items=[{}], old='', new=''): yield update def pick(data={}, path=[]): + """ + Convenience function to flatten nested collections of OSCAL data (from JSON + and YAML) data and pick on the relevant elements by their "path" identifier. + + :param data: the data from the document instance in dictionary form + :type data: dict + + :param path: a list of one or more key names to find, and if found, return key + value pairs while discarding those keys not in the path list. + :type path: list(str) + + :return: the flattened collection of only the relevant key-value pairs from the + data. + :rtype: collections.OrderedDict + """ return reduce(operator.getitem, path, data) def pick_xml(data={}, namespaces={}, path=None): + """ + Convenience function to use composable XPath queries to select specific + key-value data from OSCAL data sourced from OSCAL XML document instances. + + :param data: the data from the document instance in dictionary form + :type data: dict + + :param namespaces: an optional allow list of XML namespaces to use exclusively + to query the data for results. + :type namespaces: dict + + :param path: a XPath query used to collect the key-value pairs. + :type path: str + + :return: the flattened collection of only the relevant key-value pairs from the + data. + :rtype: collections.OrderedDict + """ results = data.xpath(path, namespaces=namespaces) if len(results) > 0: return results[0] def update(data, updates=[{}], originals=[{}], compare_key=None): + """Iterates through a list of potential updates matched from an OSCAL JSON + or YAML document instance, checking for duplicates as defined by a compare + key to avoid modifications where post-update there would be duplicate adjacent + elements. + + :param data: the OSCAL XML document data to be modified, passed by reference to + modify the data in place + :type data: dict + + :param namespaces: a collection of additional XML namespaces to consider when + processing additional updates, not including `oscal` or `o`, the default XML + for NIST OSCAL development. + :type namespaces: dict + + :param updates: a subset list of potential updates with confirmed matches for given + key-value pairs from originals, the complete list of matching keys (whether or not + the value is a match) from the target OSCAL XML document instance. + :type updates: list + + :param originals: the complete list of all fields that match a given key found by + searching a target OSCAL XML document instance. + :type originals: list + + :param compare_key: the key of a key-value pair used to test for equivalence where + two items are considered equal iff this key in both objects has the same value + :type compare_key: str + + :return: None + :rtype: None + """ maybe_dupes = [o.get(compare_key) for o in originals] for update in updates: @@ -107,7 +234,8 @@ def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=No searching a target OSCAL XML document instance. :type originals: list - :param compare_key: + :param compare_key: the key of a key-value pair used to test for equivalence where + two items are considered equal iff this key in both objects has the same value :type compare_key: str :return: None @@ -129,8 +257,9 @@ def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=No target.attrib[k] = update[k] def process_json(file, old='', new='', dry_run=False): - """Analyze OSCAL JSON document instances and replace extensions for href - fields accordingly. + """Analyze OSCAL JSON document instances and replace extensions for the + specified fields, replace fields, and modify results in the target file + or optionally a separate file for testing purposes. :param file: the original target path and file :type file: str @@ -165,8 +294,9 @@ def process_json(file, old='', new='', dry_run=False): logging.exception(err) def process_xml(file, old='', new='', dry_run=False): - """Analyze OSCAL XML document instances and replace extensions for href - fields accordingly. + """Analyze OSCAL XML document instances and replace extensions for the + specified fields, replace fields, and modify results in the target file + or optionally a separate file for testing purposes. :param file: the original target path and file :type file: str From 596c46a12e5ddbbd650d56052913aa11d4d36fcf Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 28 Sep 2021 00:16:30 -0400 Subject: [PATCH 11/12] A little more function documentation. --- build/ci-cd/python/convert_filetypes.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 25783f5bec..54d366a58c 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -112,20 +112,32 @@ def replace(items=[{}], old='', new=''): :type new: str """ for i in items: + # Iterate through each item dict i in list of potential replacement points. + # Initialize empty update object. update = {} for k in i.keys(): + # Iterate through each key-value pair in dict i (one i from list i of items) old_value = i[k] update[k] = old_value.replace(old, new) if isinstance(old_value, str) else old_value + # ignore `path` as this key is added by this script for another pre-processing step + # to encode an array to walk the JSON/YAML path or a full XPath query for XML source + # data; we do not want to process this path key-value, it is internal metadata. if k != 'path' and update[k] != old_value: + # This is the first key to be added to the `update` nested dict `original`, so + # initialize if there has not been a previous use. update['original'] = {} if not update.get('original') else update.get('original') + # Now a new key-value can be added and maintain previous additions as well. update['original'][k] = old_value if update == i: + # if the update is equivalent to the potentially changed `i` item, yield None yield else: + # the `i` item is different, and has an original key to stuff what value changed + # for future processing, yield the discrete update yield update def pick(data={}, path=[]): From 32fdc0cba9ed661fa518dd3ad0e53559dc7a9226 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 28 Sep 2021 00:43:06 -0400 Subject: [PATCH 12/12] Final function signature touch-ups. --- build/ci-cd/python/convert_filetypes.py | 34 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/build/ci-cd/python/convert_filetypes.py b/build/ci-cd/python/convert_filetypes.py index 54d366a58c..6fdf58388d 100644 --- a/build/ci-cd/python/convert_filetypes.py +++ b/build/ci-cd/python/convert_filetypes.py @@ -11,7 +11,7 @@ from lxml import etree import sys -def find(data={}, lookups=[], path=None): +def find(data, lookups=None, path=None): """Search data from an OSCAL JSON or YAML document instance in dictionary form and perform lookups to find one or more keys for the names of OSCAL fields or flags. @@ -33,6 +33,7 @@ def find(data={}, lookups=[], path=None): found :rtype: Iterable[dict] """ + lookups = lookups if lookups else [] path = path if path else [] # In case this is a list @@ -56,7 +57,7 @@ def find(data={}, lookups=[], path=None): matches = { l: data[l] for l in lookups if data.get(l) } yield { 'path': new_path, **matches } -def find_xml(data={}, namespaces={}, lookups=None): +def find_xml(data, namespaces=None, lookups=None): """Search data from an OSCAL XML document instance with a XPath query to perform lookups to find one or more keys for the names of OSCAL fields or flags. @@ -74,6 +75,9 @@ def find_xml(data={}, namespaces={}, lookups=None): found :rtype: Iterable[dict] """ + namespaces = namespaces if namespaces else {} + lookups = lookups if lookups else '' + for result in list(data.xpath(lookups, namespaces=namespaces)): yield { 'path': get_full_xpath(result), @@ -93,7 +97,7 @@ def get_full_xpath(element=None): if element.getparent() is None: return f"/{element.tag}" return f"{get_full_xpath(element.getparent())}/{element.tag}" -def replace(items=[{}], old='', new=''): +def replace(items, old='', new=''): """Takes OSCAL JSON, XML, and YAML source data matches as key value pairs in memory and makes changes in place, before persisting results to disk. @@ -111,6 +115,8 @@ def replace(items=[{}], old='', new=''): :param new: the 'new' value to replace :type new: str """ + items = items if items else [{}] + for i in items: # Iterate through each item dict i in list of potential replacement points. # Initialize empty update object. @@ -140,7 +146,7 @@ def replace(items=[{}], old='', new=''): # for future processing, yield the discrete update yield update -def pick(data={}, path=[]): +def pick(data, path=None): """ Convenience function to flatten nested collections of OSCAL data (from JSON and YAML) data and pick on the relevant elements by their "path" identifier. @@ -156,9 +162,10 @@ def pick(data={}, path=[]): data. :rtype: collections.OrderedDict """ + path = path if path else [] return reduce(operator.getitem, path, data) -def pick_xml(data={}, namespaces={}, path=None): +def pick_xml(data, namespaces=None, path=None): """ Convenience function to use composable XPath queries to select specific key-value data from OSCAL data sourced from OSCAL XML document instances. @@ -177,10 +184,16 @@ def pick_xml(data={}, namespaces={}, path=None): data. :rtype: collections.OrderedDict """ + namespaces = namespaces if namespaces else {} + path = path if path else '' + results = data.xpath(path, namespaces=namespaces) - if len(results) > 0: return results[0] + if len(results) > 0: + return results[0] + else: + return None -def update(data, updates=[{}], originals=[{}], compare_key=None): +def update(data, updates=None, originals=None, compare_key=None): """Iterates through a list of potential updates matched from an OSCAL JSON or YAML document instance, checking for duplicates as defined by a compare key to avoid modifications where post-update there would be duplicate adjacent @@ -211,6 +224,8 @@ def update(data, updates=[{}], originals=[{}], compare_key=None): :return: None :rtype: None """ + updates = updates if updates else [{}] + originals = originals if originals else [{}] maybe_dupes = [o.get(compare_key) for o in originals] for update in updates: @@ -223,7 +238,7 @@ def update(data, updates=[{}], originals=[{}], compare_key=None): if target.get(k) and not k == 'path' and not k == 'original': target[k] = update[k] -def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=None): +def update_xml(data, namespaces=None, updates=None, originals=None, compare_key=None): """Iterates through a list of potential updates matched from an OSCAL XML document instance, checking for duplicates as defined by a compare key to avoid modifications where post-update there would be duplicate adjacent @@ -253,6 +268,9 @@ def update_xml(data, namespaces={}, updates=[{}], originals=[{}], compare_key=No :return: None :rtype: None """ + namespaces = namespaces if namespaces else {} + updates = updates if updates else [{}] + originals = originals if originals else [{}] maybe_dupes = [o.get(compare_key) for o in originals] for update in updates: