Skip to content

Commit

Permalink
Tools: Snippet extractor and validator
Browse files Browse the repository at this point in the history
Rework error collection

Scan for snippet errors during validation

tmp
  • Loading branch information
DavidSouther committed Jan 9, 2024
1 parent ef13e9e commit b149462
Show file tree
Hide file tree
Showing 12 changed files with 375 additions and 208 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ venv
xcuserdata

# Ignore rust_dev_preview as it's no longer part of the project
rust_dev_preview
rust_dev_preview
# .snippets are created temporarily as build artifacts
.snippets
11 changes: 4 additions & 7 deletions .tools/validation/doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DocGen:
snippets: dict[str, Snippet] = field(default_factory=dict)

@staticmethod
def from_root(root: Path) -> Self | MetadataErrors:
def from_root(root: Path) -> (Self, MetadataErrors):
errors = MetadataErrors()

with open(root / "sdks.yaml", encoding="utf-8") as file:
Expand All @@ -30,12 +30,9 @@ def from_root(root: Path) -> Self | MetadataErrors:

with open(root / "services.yaml", encoding="utf-8") as file:
meta = yaml.safe_load(file)
parsed = parse_services("services.yaml", meta)
services = errors.maybe_extend(parsed)
services, service_errors = parse_services("services.yaml", meta)
errors.extend(service_errors)

snippets = {}

if len(errors) > 0:
return errors

return DocGen(sdks=sdks, services=services, snippets=snippets)
return DocGen(sdks=sdks, services=services, snippets=snippets), errors
98 changes: 46 additions & 52 deletions .tools/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class Version:
more_info: list[Url] = field(default_factory=list)

@staticmethod
def from_yaml(yaml: dict[str, any], doc_gen: DocGen) -> Self | MetadataParseError:
def from_yaml(yaml: dict[str, any], doc_gen: DocGen) -> (Self, MetadataParseError):
errors = MetadataErrors()

sdk_version = int(yaml.get("sdk_version", 0))
Expand Down Expand Up @@ -109,17 +109,17 @@ def from_yaml(yaml: dict[str, any], doc_gen: DocGen) -> Self | MetadataParseErro
if add_services and block_content is not None:
errors.append(metadata_errors.APIExampleCannotAddService())

if len(errors) > 0:
return errors

return Version(
sdk_version,
block_content,
excerpts,
github,
add_services,
sdkguide,
more_info,
return (
Version(
sdk_version,
block_content,
excerpts,
github,
add_services,
sdkguide,
more_info,
),
errors,
)


Expand All @@ -141,18 +141,14 @@ def from_yaml(name: str, yaml: any, doc_gen: DocGen) -> Self | MetadataErrors:

versions: list[Version] = []
for version in yaml_versions:
version = Version.from_yaml(version, doc_gen)
if isinstance(version, Version):
versions.append(version)
else:
for error in version:
error.language = name
errors.append(error)
version, version_errors = Version.from_yaml(version, doc_gen)
errors.extend(version_errors)
versions.append(version)

if len(errors) > 0:
return errors
for error in errors:
error.language = name

return Language(name, versions)
return Language(name, versions), errors


@dataclass
Expand All @@ -178,7 +174,7 @@ class Example:
source_key: Optional[str] = field(default=None)

@staticmethod
def from_yaml(yaml: any, doc_gen: DocGen) -> Self | MetadataErrors:
def from_yaml(yaml: any, doc_gen: DocGen) -> (Self, MetadataErrors):
errors = MetadataErrors()

title = get_with_valid_entities("title", yaml, errors)
Expand Down Expand Up @@ -214,22 +210,22 @@ def from_yaml(yaml: any, doc_gen: DocGen) -> Self | MetadataErrors:
except DuplicateItemException:
pass

if len(errors) > 0:
return errors

return Example(
id="",
file="",
title=title,
title_abbrev=title_abbrev,
category=category,
guide_topic=guide_topic,
languages=languages,
service_main=service_main,
services=services,
synopsis=synopsis,
synopsis_list=synopsis_list,
source_key=source_key,
return (
Example(
id="",
file="",
title=title,
title_abbrev=title_abbrev,
category=category,
guide_topic=guide_topic,
languages=languages,
service_main=service_main,
services=services,
synopsis=synopsis,
synopsis_list=synopsis_list,
source_key=source_key,
),
errors,
)


Expand Down Expand Up @@ -284,24 +280,22 @@ def idFormat(id: str, doc_gen: DocGen) -> bool:

def parse(
file: str, yaml: dict[str, any], doc_gen: DocGen
) -> list[Example] | MetadataErrors:
) -> (list[Example], MetadataErrors):
examples: list[Example] = []
errors = MetadataErrors()
for id in yaml:
if not idFormat(id, doc_gen):
errors.append(metadata_errors.NameFormat(file=file, id=id))
example = Example.from_yaml(yaml[id], doc_gen)
if isinstance(example, Example):
example.file = file
example.id = id
examples.append(example)
else:
for error in example:
error.file = file
error.id = id
errors.append(error)

return examples if len(errors) == 0 else errors
example, example_errors = Example.from_yaml(yaml[id], doc_gen)
for error in example_errors:
error.file = file
error.id = id
errors.extend(example_errors)
example.file = file
example.id = id
examples.append(example)

return examples, errors


if __name__ == "__main__":
Expand Down
7 changes: 4 additions & 3 deletions .tools/validation/metadata_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,14 @@ def __init__(self, item: MetadataParseError):
class MetadataErrors:
"""MyPy isn't catching list[Foo].append(list[Foo])"""

def __init__(self):
def __init__(self, no_duplicates=False):
self.no_duplicates = no_duplicates
self._errors: list[MetadataError] = []

def append(self, item: MetadataError):
if not isinstance(item, MetadataError):
raise InvalidItemException(item)
if item in self._errors:
if self.no_duplicates and item in self._errors:
raise DuplicateItemException(item)
self._errors.append(item)

Expand All @@ -98,7 +99,7 @@ def __repr__(self) -> str:
return repr(self._errors)

def __str__(self) -> str:
errs = "\n".join([f"\t{err!r}" for err in self])
errs = "\n".join([f"\t{err}" for err in self])
return f"ExampleErrors with {len(self)} errors:\n{errs}"


Expand Down
14 changes: 9 additions & 5 deletions .tools/validation/metadata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def load(path: Path, doc_gen: DocGen) -> list[Example] | metadata_errors.Metadat

def test_parse():
meta = yaml.safe_load(GOOD_SINGLE_CPP)
parsed = parse("test_cpp.yaml", meta, DOC_GEN)
parsed, errors = parse("test_cpp.yaml", meta, DOC_GEN)
assert len(errors) == 0
assert parsed == [
Example(
file="test_cpp.yaml",
Expand Down Expand Up @@ -118,7 +119,8 @@ def test_parse():

def test_parse_cross():
meta = yaml.safe_load(CROSS_META)
actual = parse("cross.yaml", meta, DOC_GEN)
actual, errors = parse("cross.yaml", meta, DOC_GEN)
assert len(errors) == 0
assert actual == [
Example(
file="cross.yaml",
Expand Down Expand Up @@ -159,7 +161,8 @@ def test_parse_cross():

def test_parse_curated():
meta = yaml.safe_load(CURATED)
actual = parse("curated.yaml", meta, DOC_GEN)
actual, errors = parse("curated.yaml", meta, DOC_GEN)
assert len(errors) == 0
assert actual == [
Example(
id="autogluon_tabular_with_sagemaker_pipelines",
Expand All @@ -180,7 +183,8 @@ def test_parse_curated():


def test_verify_load_successful():
examples = load("valid_metadata.yaml", DOC_GEN)
examples, errors = load("valid_metadata.yaml", DOC_GEN)
assert len(errors) == 0
assert examples == [
Example(
file="valid_metadata.yaml",
Expand Down Expand Up @@ -370,7 +374,7 @@ def test_verify_load_successful():
],
)
def test_common_errors(filename, expected_errors):
actual = load(filename, DOC_GEN)
_, actual = load(filename, DOC_GEN)
assert expected_errors == actual._errors


Expand Down
73 changes: 2 additions & 71 deletions .tools/validation/project_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import os
import re
import argparse
import logging
import sys
from dataclasses import dataclass, field
Expand Down Expand Up @@ -54,7 +53,6 @@ def check_files(root: Path, errors: MetadataErrors):
verify_no_deny_list_words(file_contents, file_path, errors)
verify_no_secret_keys(file_contents, file_path, errors)
verify_no_secret_keys(file_contents, file_path, errors)
verify_snippet_start_end(file_contents, file_path, errors)

print(f"{file_count} files scanned in {root}.\n")

Expand Down Expand Up @@ -180,78 +178,11 @@ def verify_no_secret_keys(
errors.append(PossibleSecretKey(file=str(file_location), word=word))


@dataclass
class SnippetParseError(MetadataParseError):
tag: str = field(default="")


@dataclass
class DuplicateSnippetTagInFile(SnippetParseError):
def message(self):
return f"Duplicate tag {self.tag}"


@dataclass
class SnippetNoMatchingStart(SnippetParseError):
def message(self):
return f"No matching start for {self.tag}"


@dataclass
class SnippetNoMatchingEnd(SnippetParseError):
def message(self):
return f"No matching end for {self.tag}"


# TODO move this to snippets
def verify_snippet_start_end(
file_contents: str, file_location: Path, errors: MetadataErrors
):
"""Scan the file contents for snippet-start and snippet-end tags and verify
that they are in matched pairs. Log errors and return the count of errors."""
snippet_start = "snippet" + "-start:["
snippet_end = "snippet" + "-end:["
snippet_tags = set()
for word in file_contents.split():
if snippet_start in word:
tag = word.split("[")[1]
if tag in snippet_tags:
errors.append(DuplicateSnippetTagInFile(file=file_location, tag=tag))
else:
snippet_tags.add(tag)
elif snippet_end in word:
tag = word.split("[")[1]
if tag in snippet_tags:
snippet_tags.remove(tag)
else:
errors.append(SnippetNoMatchingStart(file=file_location, tag=tag))

for tag in snippet_tags:
errors.append(SnippetNoMatchingEnd(file=file_location, tag=tag))


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--quiet",
action="store_true",
help="Suppresses output of filenames while parsing. " "The default is False.",
)
parser.add_argument(
"--root",
help="The root path from which to search for files "
"to check. The default is the current working "
"folder.",
)
args = parser.parse_args()

root_path = Path(
os.path.abspath(".") if not args.root else os.path.abspath(args.root)
)

root_path = Path(__file__).parent.parent.parent
print("----------\n\nRun Tests\n")
errors = MetadataErrors()
check_files(root_path, args.quiet, errors)
check_files(root_path, errors)
verify_sample_files(root_path, errors)
error_count = len(errors)
if error_count > 0:
Expand Down
Loading

0 comments on commit b149462

Please sign in to comment.