Skip to content

Commit

Permalink
add ability to external project parsers to skip gap analysis and embe…
Browse files Browse the repository at this point in the history
…dding calculation
  • Loading branch information
northdpole committed Jun 19, 2024
1 parent 04afcdb commit 53b5fb2
Show file tree
Hide file tree
Showing 25 changed files with 127 additions and 69 deletions.
3 changes: 2 additions & 1 deletion application/cmd/cre_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def register_standard(
standard_entries: List[defs.Standard],
collection: db.Node_collection,
generate_embeddings=True,
calculate_gap_analysis=True,
db_connection_str: str = "",
):
if os.environ.get("CRE_NO_GEN_EMBEDDINGS"):
Expand Down Expand Up @@ -265,7 +266,7 @@ def register_standard(
# calculate gap analysis
jobs = []
pending_stadards = collection.standards()
if not os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"):
if calculate_gap_analysis and not os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"):
for standard_name in pending_stadards:
if standard_name == importing_name:
continue
Expand Down
2 changes: 1 addition & 1 deletion application/tests/capec_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class fakeRequest:
version="3.7",
),
]
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, capec_parser.Capec().name)
self.assertEqual(len(nodes), 2)
self.assertCountEqual(nodes[0].todict(), expected[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/ccmv4_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
# version="v4.0",
# ),
# ]
# for name, nodes in entries.items():
# for name, nodes in entries.results.items():
# self.assertEqual(name, ccmv4.CloudControlsMatrix().name)
# self.assertEqual(len(nodes), 2)
# self.assertCountEqual(nodes[0].todict(), expected[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/cheatsheets_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Repo:
links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)],
)
self.maxDiff = None
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, cheatsheets_parser.Cheatsheets().name)
self.assertEqual(len(nodes), 1)
self.assertCountEqual(expected.todict(), nodes[0].todict())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class fakeRequest:
version="CNSWP v1.0",
),
]
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(
name, cloud_native_security_controls.CloudNativeSecurityControls().name
)
Expand Down
2 changes: 1 addition & 1 deletion application/tests/cwe_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def iter_content(self, chunk_size=None):
],
),
]
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, cwe.CWE().name)
self.assertEqual(len(nodes), 2)
self.assertCountEqual(nodes[0].todict(), expected[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/dsomm_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class fakeRequest:
subsection="Defined build process",
),
]
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, dsomm.DSOMM().name)
self.assertEqual(len(nodes), 2)
self.assertCountEqual(nodes[0].todict(), expected[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/juiceshop_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class fakeRequest:
tooltype=defs.ToolTypes.Training,
),
]
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, juiceshop.JuiceShop().name)
self.assertEqual(len(nodes), 2)
self.assertCountEqual(nodes[0].todict(), expected[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/misc_tools_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_document_todict(
entries = misc_tools_parser.MiscTools().parse(
cache=collection, ph=PromptHandler(database=self.collection)
)
for name, tools in entries.items():
for name, tools in entries.results.items():
self.assertEqual(name, "OWASP WrongSecrets")
self.assertEqual(len(tools), 1)
self.assertCountEqual(expected.todict(), tools[0].todict())
Expand Down
2 changes: 1 addition & 1 deletion application/tests/secure_headers_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Repo:
section="headerAsection",
links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)],
)
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, secure_headers.SecureHeaders().name)

self.maxDiff = None
Expand Down
4 changes: 2 additions & 2 deletions application/tests/zap_alerts_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_register_zap_alert_top_10_tags(self, mock_git) -> None:
cache=self.collection,
ph=prompt_client.PromptHandler(database=self.collection),
)
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, zap_alerts_parser.ZAP().name)
expected = defs.Tool(
name="ZAP Rule",
Expand Down Expand Up @@ -205,7 +205,7 @@ def test_register_zap_alert_cwe(self, mock_git) -> None:
],
)
self.maxDiff = None
for name, nodes in entries.items():
for name, nodes in entries.results.items():
self.assertEqual(name, zap_alerts_parser.ZAP().name)
self.assertEqual(len(nodes), 1)
self.assertCountEqual(expected.todict(), nodes[0].todict())
39 changes: 11 additions & 28 deletions application/utils/external_project_parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from application.database import db
from application.defs import cre_defs as defs
from application.utils.external_project_parsers import base_parser_defs
from rq import Queue
from application.utils import redis
from typing import List, Dict, Optional
from application.prompt_client import prompt_client as prompt_client
import logging
import time
Expand All @@ -15,32 +13,12 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# abstract class/interface that shows how to import a project that is not cre or its core resources


class ParserInterface(object):
# The name of the resource being parsed
name: str

def parse(
database: db.Node_collection,
prompt_client: Optional[prompt_client.PromptHandler],
) -> Dict[str, List[defs.Document]]:
"""
Parses the resources of a project,
links the resource of the project to CREs
this can be done either using glue resources, AI or any other supported method
then calls cre_main.register_node
Returns a dict with a key of the resource for importing and a value of list of documents with CRE links, optionally with their embeddings filled in
"""
raise NotImplementedError


class BaseParser:
@classmethod
def register_resource(
self,
sclass: ParserInterface,
sclass: base_parser_defs.ParserInterface,
db_connection_str: str,
):
from application.cmd import cre_main
Expand All @@ -58,10 +36,15 @@ def register_resource(
)
return

result = sclass_instance.parse(db, ph)
resultObj = sclass_instance.parse(db, ph)
try:
for _, documents in result.items():
cre_main.register_standard(documents, db)
for _, documents in resultObj.results.items():
cre_main.register_standard(
standard_entries=documents,
db_connection_str=db,
calculate_gap_analysis=resultObj.calculate_gap_analysis,
generate_embeddings=resultObj.calculate_embeddings,
)
except ValueError as ve:
err_str = f"error importing {sclass.name}, err: {ve}"
raise ValueError(err_str)
Expand All @@ -79,7 +62,7 @@ def call_importers(self, db_connection_str: str):
if os.environ.get("CRE_IMPORTERS_IMPORT_ONLY"):
import_only = json.loads(os.environ.get("CRE_IMPORTERS_IMPORT_ONLY"))

for subclass in ParserInterface.__subclasses__():
for subclass in base_parser_defs.ParserInterface.__subclasses__():
if import_only and subclass.name not in import_only:
continue

Expand Down
31 changes: 31 additions & 0 deletions application/utils/external_project_parsers/base_parser_defs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List, Dict, Optional
from dataclasses import dataclass
from application.defs import cre_defs as defs
from application.prompt_client import prompt_client as prompt_client
from application.database import db

# abstract class/interface that shows how to import a project that is not cre or its core resources


@dataclass
class ParseResult(object):
results: Dict[str, List[defs.Document]] = None
calculate_gap_analysis: bool = True
calculate_embeddings: bool = True

class ParserInterface(object):
# The name of the resource being parsed
name: str

def parse(
database: db.Node_collection,
prompt_client: Optional[prompt_client.PromptHandler],
) -> ParseResult:
"""
Parses the resources of a project,
links the resource of the project to CREs
this can be done either using glue resources, AI or any other supported method
then calls cre_main.register_node
Returns a dict with a key of the resource for importing and a value of list of documents with CRE links, optionally with their embeddings filled in
"""
raise NotImplementedError
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

from application.utils.external_project_parsers.base_parser import ParserInterface
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
from application.prompt_client import prompt_client as prompt_client


Expand All @@ -22,7 +25,11 @@ class Capec(ParserInterface):
def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler):
xml = requests.get(self.capec_xml)
if xml.status_code == 200:
return {self.name: self.register_capec(xml_contents=xml.text, cache=cache)}
return ParseResult(
results={
self.name: self.register_capec(xml_contents=xml.text, cache=cache)
}
)
else:
logger.fatal(f"Could not get CAPEC's XML data, error was {xml.text}")

Expand Down
5 changes: 4 additions & 1 deletion application/utils/external_project_parsers/parsers/ccmv4.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

from application.utils.external_project_parsers.base_parser import ParserInterface
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
from application.prompt_client import prompt_client as prompt_client
from application.utils import spreadsheet as sheet_utils

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from application.defs import cre_defs as defs
import os
import re
from application.utils.external_project_parsers.base_parser import ParserInterface
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
from application.prompt_client import prompt_client as prompt_client


Expand All @@ -29,7 +32,7 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler):
cheatsheets = self.register_cheatsheets(
repo=repo, cache=cache, cheatsheets_path=cheatsheets_path, repo_path=c_repo
)
return {self.name: cheatsheets}
return ParseResult(results={self.name: cheatsheets})

def register_cheatsheets(
self, cache: db.Node_collection, repo, cheatsheets_path, repo_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from application.database import db
from application.defs import cre_defs as defs
from application.prompt_client import prompt_client as prompt_client
from application.utils.external_project_parsers.base_parser import ParserInterface
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
import requests

logging.basicConfig()
Expand Down Expand Up @@ -79,4 +82,4 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler):
f"stored {cnsc.__repr__()} but could not link it to any CRE reliably"
)
standard_entries.append(cnsc)
return {self.name: standard_entries}
return ParseResult(results={self.name: standard_entries})
18 changes: 12 additions & 6 deletions application/utils/external_project_parsers/parsers/cwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from application.defs import cre_defs as defs
import shutil
import xmltodict
from application.utils.external_project_parsers.base_parser import ParserInterface
from application.prompt_client import prompt_client
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)

logging.basicConfig()
logger = logging.getLogger(__name__)
Expand All @@ -33,11 +36,14 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler):
for _, _, files in os.walk(tmp_dir, topdown=False):
for file in files:
if file.startswith("cwe") and file.endswith(".xml"):
return {
self.name: self.register_cwe(
xml_file=os.path.join(tmp_dir, file), cache=cache
),
}
return ParseResult(
results={
self.name: self.register_cwe(
xml_file=os.path.join(tmp_dir, file), cache=cache
),
},
calculate_gap_analysis=False,
)
raise RuntimeError("there is no file named cwe.xml in the target zip")

def make_hyperlink(self, cwe_id: int):
Expand Down
7 changes: 5 additions & 2 deletions application/utils/external_project_parsers/parsers/dsomm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
from application.database import db
from application.defs import cre_defs as defs
from application.prompt_client import prompt_client as prompt_client
from application.utils.external_project_parsers.base_parser import ParserInterface
import requests
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)

logging.basicConfig()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -150,4 +153,4 @@ def parse(
# use iso as glue
standard = self.link_to_iso(aname, activity, cache, standard)
standard_entries.append(standard)
return {self.name: standard_entries}
return ParseResult(results={self.name: standard_entries})
11 changes: 7 additions & 4 deletions application/utils/external_project_parsers/parsers/iso27001.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
from simplify_docx import simplify
import docx
import tempfile
from application.utils.external_project_parsers.base_parser import ParserInterface
from application.prompt_client import prompt_client as prompt_client
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
from typing import List

logging.basicConfig()
Expand Down Expand Up @@ -77,9 +80,9 @@ class ISO27001(ParserInterface):
# return nist_table

def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler):
return {
self.name: []
} # the doc above does not have names we get the names from the spreadsheet for now, disable
return ParseResult(
results={self.name: []}
) # the doc above does not have names we get the names from the spreadsheet for now, disable
# url = self.url
# documents: List[defs.Standard] = []
# nist_nodes = cache.get_nodes(name="NIST 800-53 v5")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from application.defs import cre_defs as defs
import re
from application.prompt_client import prompt_client as prompt_client
from application.utils.external_project_parsers.base_parser import ParserInterface
from application.utils.external_project_parsers.base_parser_defs import (
ParserInterface,
ParseResult,
)
import requests

logging.basicConfig()
Expand Down Expand Up @@ -94,4 +97,4 @@ def parse(
f"stored {chal.section} but could not link it to any CRE reliably"
)
chals.append(chal)
return {self.name: chals}
return ParseResult(results={self.name: chals})
Loading

0 comments on commit 53b5fb2

Please sign in to comment.