From 46d652750f8980b8498ea62c0a8de022a6193fdc Mon Sep 17 00:00:00 2001 From: Nikita Wootten Date: Fri, 25 Aug 2023 12:20:04 -0400 Subject: [PATCH] Produce Metaschemas without XXEs (#1665) (#1901) * Produce Metaschemas without XXEs (#1665) * ADR for XXE resolution * Accepted ADR 5 * Apply suggestions from code review Co-authored-by: A.J. Stein * Revert release artifact archive readme extension change --------- Co-authored-by: A.J. Stein --- build/Makefile | 56 +++++++++++++++-- build/README.md | 3 +- build/resolve-entities.xsl | 60 +++++++++++++++++++ decisions/0005-repository-reorganization.md | 2 +- decisions/0006-source-metaschema-xxes.md | 32 ++++++++++ .../{README.txt => release-readme.txt} | 6 +- 6 files changed, 148 insertions(+), 11 deletions(-) create mode 100644 build/resolve-entities.xsl create mode 100644 decisions/0006-source-metaschema-xxes.md rename src/release/{README.txt => release-readme.txt} (86%) diff --git a/build/Makefile b/build/Makefile index fa65d24b15..8d43a71f6c 100644 --- a/build/Makefile +++ b/build/Makefile @@ -14,7 +14,7 @@ help: ## Show this help message all: artifacts checks archives ## Run all pipelines .PHONY: artifacts -artifacts: schemas converters ## Generate all artifacts +artifacts: schemas converters resolved-metaschemas ## Generate all artifacts .PHONY: checks checks: linkcheck validate test-profile-resolution ## Run all tests and checks @@ -26,11 +26,13 @@ node_modules: package.json package-lock.json npm ci .PHONY: clean -clean: clean-schemas clean-linkcheck clean-converters clean-archives ## Remove all generated content +clean: clean-schemas clean-linkcheck clean-converters clean-archives clean-resolved-metaschemas ## Remove all generated content METASCHEMA_XSLT_COMMAND:=metaschema-xslt/bin/metaschema-xslt SRC_DIR:=../src/metaschema -# Contains the metaschemas that do not contain "common" or "metadata" +# Contains all OSCAL metaschema modules, including those without exported roots +ALL_METASCHEMAS:=$(shell find $(SRC_DIR) -name '*_metaschema.xml') +# Contains the OSCAL metaschema modules that contain root assemblies METASCHEMAS:=$(shell find $(SRC_DIR) -name '*_metaschema.xml' -a ! -name '*common*' -a ! -name '*metadata*') GENERATED_DIR:=generated @@ -71,6 +73,33 @@ clean-converters: ## Remove generated converters rm -fr $(GENERATED_DIR)/*_xml-to-json-converter.xsl rm -fr $(GENERATED_DIR)/*_json-to-xml-converter.xsl +################################### +# Resolved Metaschemas Generation # +################################### + +POM_PATH:=./pom.xml +define EXEC_SAXON + mvn --quiet -f "$(POM_PATH)" exec:java \ + -Dexec.mainClass="net.sf.saxon.Transform" \ + -Dexec.args="$1" +endef + +RESOLVER_STYLESHEET:=./resolve-entities.xsl + +RESOLVED_METASCHEMA_SUFFIX:=RESOLVED +RESOLVED_METASCHEMAS:=$(patsubst $(SRC_DIR)/%_metaschema.xml,$(GENERATED_DIR)/%_metaschema_$(RESOLVED_METASCHEMA_SUFFIX).xml,$(ALL_METASCHEMAS)) + +$(GENERATED_DIR)/%_metaschema_$(RESOLVED_METASCHEMA_SUFFIX).xml: $(SRC_DIR)/%_metaschema.xml + @mkdir -p $(GENERATED_DIR) + $(call EXEC_SAXON,-s:$(SRC_DIR)/$*_metaschema.xml -xsl:$(RESOLVER_STYLESHEET) -o:$@ importHrefSuffix=$(RESOLVED_METASCHEMA_SUFFIX)) + +.PHONY: resolved-metaschemas +resolved-metaschemas: $(RESOLVED_METASCHEMAS) ## Generate the resolved metaschema modules + +.PHONY: clean-resolved-metaschemas +clean-resolved-metaschemas: ## Remove generated resolvd metaschema modules + rm -f $(RESOLVED_METASCHEMAS) + ###################### # Archive Generation # ###################### @@ -83,14 +112,15 @@ ARCHIVE_TEMP_DIR:=$(GENERATED_DIR)/archive_temp .PHONY: archives archives: $(ZIP_ARCHIVE) ## Archive converters and schemas -$(ZIP_ARCHIVE) $(TARBZ2_ARCHIVE): converters schemas +$(ZIP_ARCHIVE) $(TARBZ2_ARCHIVE): converters schemas resolved-metaschemas @echo Generating archive - mkdir -p $(ARCHIVE_TEMP_DIR)/{json,xml}/{convert,schema} - cp ../src/release/README.txt "$(ARCHIVE_TEMP_DIR)/README.md" + mkdir -p $(ARCHIVE_TEMP_DIR)/{{json,xml}/{convert,schema},metaschema} + cp ../src/release/release-readme.txt "$(ARCHIVE_TEMP_DIR)/README.txt" cp $(XSD_OUTPUTS) "$(ARCHIVE_TEMP_DIR)/xml/schema" cp $(JSONSCHEMA_OUTPUTS) "$(ARCHIVE_TEMP_DIR)/json/schema" cp $(XML2JSON_CONVERTERS) "$(ARCHIVE_TEMP_DIR)/xml/convert" cp $(JSON2XML_CONVERTERS) "$(ARCHIVE_TEMP_DIR)/json/convert" + cp $(RESOLVED_METASCHEMAS) "$(ARCHIVE_TEMP_DIR)/metaschema" (cd "$(ARCHIVE_TEMP_DIR)" && zip -r $(abspath $(ZIP_ARCHIVE)) .) tar -jcvf "$(TARBZ2_ARCHIVE)" -C "$(ARCHIVE_TEMP_DIR)" . @@ -161,3 +191,17 @@ validate-composition-%: .PHONY: test-profile-resolution test-profile-resolution: ## Unit test the profile resolver $(MAKE) -C ../src/utils/resolver-pipeline test + +################### +# Utility Targets # +################### + +# These targets may be used by consumers of the OSCAL repository + +# All artifacts typically included in a release +RELEASE_ARTIFACTS:=$(XSD_OUTPUTS) $(JSONSCHEMA_OUTPUTS) $(XML2JSON_CONVERTERS) $(JSON2XML_CONVERTERS) $(RESOLVED_METASCHEMAS) $(ZIP_ARCHIVE) $(TARBZ2_ARCHIVE) + +# This target is used by OSCAL-Reference to generate meta-redirects for release assets +.PHONY: list-release-artifacts +list-release-artifacts: ## Print out a list of all artifacts typically included in an OSCAL release + @echo $(RELEASE_ARTIFACTS) diff --git a/build/README.md b/build/README.md index d1606c2694..cfcf561cd6 100644 --- a/build/README.md +++ b/build/README.md @@ -34,7 +34,8 @@ Developers can generate schemas locally using the `make artifacts` command. Developers can also generate individual artifacts using the following commands: * `make schemas`: Generates the JSON Schemas and XSDs off of the source Metaschemas; -* `make converters`: Generates the XSLT stylesheets for JSON<->XML conversion off of the source Metaschemas. +* `make converters`: Generates the XSLT stylesheets for JSON<->XML conversion off of the source Metaschemas; +* `make resolved-metaschemas`: Resolves external entities (XXE) present in the source OSCAL Metaschema modules for use with tools that do not support XXEs. ### Checks diff --git a/build/resolve-entities.xsl b/build/resolve-entities.xsl new file mode 100644 index 0000000000..9f8971f88c --- /dev/null +++ b/build/resolve-entities.xsl @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/decisions/0005-repository-reorganization.md b/decisions/0005-repository-reorganization.md index 35e262d957..1599cb391a 100644 --- a/decisions/0005-repository-reorganization.md +++ b/decisions/0005-repository-reorganization.md @@ -5,7 +5,7 @@ Date: 2023/07/06 ## Status -Proposed +Approved ## Context diff --git a/decisions/0006-source-metaschema-xxes.md b/decisions/0006-source-metaschema-xxes.md new file mode 100644 index 0000000000..5c979b5342 --- /dev/null +++ b/decisions/0006-source-metaschema-xxes.md @@ -0,0 +1,32 @@ +# Production of transformed source Metaschema modules without external entities (XXEs) + +Date: 08/25/2023 + +## Status + +Approved + +## Context + +We wish to remove or mitigate points friction encountered by NIST or community OSCAL developers producing tooling that consumes the OSCAL Metaschema module sources. + +The OSCAL Metaschema modules currently use external entities to prevent duplication of constraint data. +These external entities are important for modeling ergonomics and cannot be removed until a Metaschema-native approach is stabilized, however external entities have a storied history of abuse. +So called "XML External Entity (XXE) Attacks", along with the additional complexity needed to support external entity resolution, have led to a situation where many XML parsers do not ship with XXE functionality. +This has put additional burden on OSCAL tool developers seeking to consume the source Metaschemas, who have either had to choose from the small subset of XML parsers that support external entities (if one exists for their target language at all) and inherit all additional risks that come with XXEs, or perform transformation of the source Metaschema modules before consuming them. + +- Related to Issue [#1665](https://github.com/usnistgov/OSCAL/issues/1665) + +## Decision + +The NIST OSCAL Team should include the "resolved" Metaschema module sources as an artifact generated upon release. +Additionally, the NIST OSCAL Team should document the process for obtaining a resolved Metaschema module as part of the [streamlined build process](./0005-repository-reorganization.md#streamlined-build-process). + +In the event that Metaschema stabilizes constraint imports, the NIST OSCAL Team will review this ADR and potentially determine a deprecation strategy for these generated artifacts in the relevant releases. + +## Consequences + +This decision will not have any breaking changes to our process, however: + +1. The NIST team will be responsible for reviewing the additional artifacts before performing a release. +2. In the event that Metaschema stabilizes constraint imports, the NIST OSCAL team will have to maintain deprecated artifacts until the next major version. diff --git a/src/release/README.txt b/src/release/release-readme.txt similarity index 86% rename from src/release/README.txt rename to src/release/release-readme.txt index 8a69f7add8..10439713fb 100644 --- a/src/release/README.txt +++ b/src/release/release-readme.txt @@ -9,8 +9,8 @@ Documentation for the OSCAL models can be found at: https://pages.nist.gov/OSCAL This release provides 2 types of resources, each located in a different subdirectory: -- xml: Provides the XML schemas and content converters that are needed to support the OSCAL model XML-based formats. Instructions for using this information can be found at: https://github.com/usnistgov/OSCAL/tree/master/xml. -- json: Provides the JSON schemas and content converters that are needed to support the OSCAL model JSON-based formats. Instructions for using this information can be found at: https://github.com/usnistgov/OSCAL/tree/master/json. +- `xml/` and `json/`: Provides the XML and JSON schemas and content converters that are needed to support the OSCAL model. Instructions for using these artifacts can be found at https://github.com/usnistgov/OSCAL/blob/develop/build/README.md#artifact-usage +- `metaschema/`: Provides the source OSCAL Metaschema modules with all external entities (XXE) resolved for tools that do not support XXEs. These directories provide stable, released versions of the resources provided on the OSCAL GitHub repository: https://github.com/usnistgov/OSCAL. @@ -32,6 +32,6 @@ OSCAL is being developed in a public GitHub repository, in collaboration with in - Help with developing OSCAL models and associated content. - Assistance with developing documentation, tutorials, and other informational resources. -If you are interested in helping, please visit or contributing page for more information at: https://github.com/usnistgov/OSCAL/blob/master/CONTRIBUTING.md. +If you are interested in helping, please visit or contributing page for more information at: https://github.com/usnistgov/OSCAL/blob/main/CONTRIBUTING.md. Please direct any questions, comments, concerns, or kudos by email to: oscal@nist.gov.