Skip to content

Commit

Permalink
Merge pull request #355 from OWASP/gap_analysis
Browse files Browse the repository at this point in the history
Gap analysis
  • Loading branch information
john681611 authored Sep 9, 2023
2 parents 79473fb + b7ccc6e commit 85b347d
Show file tree
Hide file tree
Showing 16 changed files with 1,127 additions and 183 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,7 @@ yarn-error.log
coverage/

### Dev db
standards_cache.sqlite
standards_cache.sqlite

### Neo4j
neo4j/
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ docker:
docker-run:
docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD)

docker-neo4j:
docker run --env NEO4J_PLUGINS='["apoc"]' --volume=./neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j

lint:
[ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint

Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ To run the web application for development you can run
Alternatively, you can use the dockerfile with
<pre>make docker && make docker-run</pre>

Some features like Gap Analysis require a neo4j DB running you can start this with
<pre>make docker-neo4j</pre>
enviroment varaibles for app to connect to neo4jDB (default):
- NEO4J_URI (localhost)
- NEO4J_USR (neo4j)
- NEO4J_PASS (password)

To run the web application for production you need gunicorn and you can run from within the cre_sync dir
<pre>make prod-run</pre>

Expand Down
269 changes: 243 additions & 26 deletions application/database/db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from neo4j import GraphDatabase
import neo4j
from sqlalchemy.orm import aliased
import os
import logging
Expand All @@ -15,6 +17,8 @@
from sqlalchemy.sql.expression import desc # type: ignore
import uuid

from application.utils.gap_analysis import get_path_score

from .. import sqla # type: ignore

logging.basicConfig()
Expand Down Expand Up @@ -156,14 +160,217 @@ class Embeddings(BaseModel): # type: ignore
)


class NEO_DB:
__instance = None

driver = None
connected = False

@classmethod
def instance(self):
if self.__instance is None:
self.__instance = self.__new__(self)

URI = os.getenv("NEO4J_URI") or "neo4j://localhost:7687"
AUTH = (
os.getenv("NEO4J_USR") or "neo4j",
os.getenv("NEO4J_PASS") or "password",
)
self.driver = GraphDatabase.driver(URI, auth=AUTH)

try:
self.driver.verify_connectivity()
self.connected = True
except neo4j.exceptions.ServiceUnavailable:
logger.error(
"NEO4J ServiceUnavailable error - disabling neo4j related features"
)

return self.__instance

def __init__(sel):
raise ValueError("NEO_DB is a singleton, please call instance() instead")

@classmethod
def add_cre(self, dbcre: CRE):
if not self.connected:
return
self.driver.execute_query(
"MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})",
nid=dbcre.id,
name=dbcre.name,
description=dbcre.description,
external_id=dbcre.external_id,
database_="neo4j",
)

@classmethod
def add_dbnode(self, dbnode: Node):
if not self.connected:
return
self.driver.execute_query(
"MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})",
nid=dbnode.id,
name=dbnode.name,
section=dbnode.section,
section_id=dbnode.section_id,
subsection=dbnode.subsection or "",
tags=dbnode.tags,
version=dbnode.version or "",
description=dbnode.description,
ntype=dbnode.ntype,
database_="neo4j",
)

@classmethod
def link_CRE_to_CRE(self, id1, id2, link_type):
if not self.connected:
return
self.driver.execute_query(
"MATCH (a:CRE), (b:CRE) "
"WHERE a.id = $aID AND b.id = $bID "
"CALL apoc.create.relationship(a,$relType, {},b) "
"YIELD rel "
"RETURN rel",
aID=id1,
bID=id2,
relType=str.upper(link_type).replace(" ", "_"),
database_="neo4j",
)

@classmethod
def link_CRE_to_Node(self, CRE_id, node_id, link_type):
if not self.connected:
return
self.driver.execute_query(
"MATCH (a:CRE), (b:Node) "
"WHERE a.id = $aID AND b.id = $bID "
"CALL apoc.create.relationship(a,$relType, {},b) "
"YIELD rel "
"RETURN rel",
aID=CRE_id,
bID=node_id,
relType=str.upper(link_type).replace(" ", "_"),
database_="neo4j",
)

@classmethod
def gap_analysis(self, name_1, name_2):
if not self.connected:
return None, None
base_standard, _, _ = self.driver.execute_query(
"""
MATCH (BaseStandard:Node {name: $name1})
RETURN BaseStandard
""",
name1=name_1,
database_="neo4j",
)

path_records_all, _, _ = self.driver.execute_query(
"""
OPTIONAL MATCH (BaseStandard:Node {name: $name1})
OPTIONAL MATCH (CompareStandard:Node {name: $name2})
OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard))
WITH p
WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2)
RETURN p
""",
name1=name_1,
name2=name_2,
database_="neo4j",
)
path_records, _, _ = self.driver.execute_query(
"""
OPTIONAL MATCH (BaseStandard:Node {name: $name1})
OPTIONAL MATCH (CompareStandard:Node {name: $name2})
OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard))
WITH p
WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2)
RETURN p
""",
name1=name_1,
name2=name_2,
database_="neo4j",
)

def format_segment(seg):
return {
"start": {
"name": seg.start_node["name"],
"sectionID": seg.start_node["section_id"],
"section": seg.start_node["section"],
"subsection": seg.start_node["subsection"],
"description": seg.start_node["description"],
"id": seg.start_node["id"],
},
"end": {
"name": seg.end_node["name"],
"sectionID": seg.end_node["section_id"],
"section": seg.end_node["section"],
"subsection": seg.end_node["subsection"],
"description": seg.end_node["description"],
"id": seg.end_node["id"],
},
"relationship": seg.type,
}

def format_path_record(rec):
return {
"start": {
"name": rec.start_node["name"],
"sectionID": rec.start_node["section_id"],
"section": rec.start_node["section"],
"subsection": rec.start_node["subsection"],
"description": rec.start_node["description"],
"id": rec.start_node["id"],
},
"end": {
"name": rec.end_node["name"],
"sectionID": rec.end_node["section_id"],
"section": rec.end_node["section"],
"subsection": rec.end_node["subsection"],
"description": rec.end_node["description"],
"id": rec.end_node["id"],
},
"path": [format_segment(seg) for seg in rec.relationships],
}

def format_record(rec):
return {
"name": rec["name"],
"sectionID": rec["section_id"],
"section": rec["section"],
"subsection": rec["subsection"],
"description": rec["description"],
"id": rec["id"],
}

return [format_record(rec["BaseStandard"]) for rec in base_standard], [
format_path_record(rec["p"]) for rec in (path_records + path_records_all)
]

@classmethod
def standards(self):
if not self.connected:
return
records, _, _ = self.driver.execute_query(
'MATCH (n:Node {ntype: "Standard"}) ' "RETURN collect(distinct n.name)",
database_="neo4j",
)
return records[0][0]


class CRE_Graph:
graph: nx.Graph = None
neo_db: NEO_DB = None
__instance = None

@classmethod
def instance(cls, session):
def instance(cls, session, neo_db: NEO_DB):
if cls.__instance is None:
cls.__instance = cls.__new__(cls)
cls.neo_db = neo_db
cls.graph = cls.load_cre_graph(session)
return cls.__instance

Expand All @@ -179,6 +386,7 @@ def add_node(self, *args, **kwargs):
@classmethod
def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph:
if dbcre:
cls.neo_db.add_cre(dbcre)
graph.add_node(
f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id
)
Expand All @@ -189,6 +397,9 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph:
@classmethod
def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph:
if dbnode:
cls.neo_db.add_dbnode(dbnode)
# coma separated tags

graph.add_node(
"Node: " + str(dbnode.id),
internal_id=dbnode.id,
Expand All @@ -215,6 +426,7 @@ def load_cre_graph(cls, session) -> nx.Graph:
graph = cls.add_cre(dbcre=cre, graph=graph)

graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type)
cls.neo_db.link_CRE_to_CRE(il.group, il.cre, il.type)

for lnk in session.query(Links).all():
node = session.query(Node).filter(Node.id == lnk.node).first()
Expand All @@ -226,16 +438,19 @@ def load_cre_graph(cls, session) -> nx.Graph:
graph = cls.add_cre(dbcre=cre, graph=graph)

graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type)
cls.neo_db.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type)
return graph


class Node_collection:
graph: nx.Graph = None
neo_db: NEO_DB = None
session = sqla.session

def __init__(self) -> None:
if not os.environ.get("NO_LOAD_GRAPH"):
self.graph = CRE_Graph.instance(sqla.session)
self.neo_db = NEO_DB.instance()
self.graph = CRE_Graph.instance(sqla.session, self.neo_db)
self.session = sqla.session

def __get_external_links(self) -> List[Tuple[CRE, Node, str]]:
Expand Down Expand Up @@ -1059,30 +1274,32 @@ def find_path_between_nodes(

return res

def gap_analysis(self, node_names: List[str]) -> List[cre_defs.Node]:
"""Since the CRE structure is a tree-like graph with
leaves being nodes we can find the paths between nodes
find_path_between_nodes() is a graph-path-finding method
"""
processed_nodes = []
dbnodes: List[Node] = []
for name in node_names:
dbnodes.extend(self.session.query(Node).filter(Node.name == name).all())

for node in dbnodes:
working_node = nodeFromDB(node)
for other_node in dbnodes:
if node.id == other_node.id:
continue
if self.find_path_between_nodes(node.id, other_node.id):
working_node.add_link(
cre_defs.Link(
ltype=cre_defs.LinkTypes.LinkedTo,
document=nodeFromDB(other_node),
)
)
processed_nodes.append(working_node)
return processed_nodes
def gap_analysis(self, node_names: List[str]):
if not self.neo_db.connected:
return None
base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1])
if base_standard is None:
return None
grouped_paths = {}
for node in base_standard:
key = node["id"]
if key not in grouped_paths:
grouped_paths[key] = {"start": node, "paths": {}}

for path in paths:
key = path["start"]["id"]
end_key = path["end"]["id"]
path["score"] = get_path_score(path)
del path["start"]
if end_key in grouped_paths[key]["paths"]:
if grouped_paths[key]["paths"][end_key]["score"] > path["score"]:
grouped_paths[key]["paths"][end_key] = path
else:
grouped_paths[key]["paths"][end_key] = path
return grouped_paths

def standards(self):
return self.neo_db.standards()

def text_search(self, text: str) -> List[Optional[cre_defs.Document]]:
"""Given a piece of text, tries to find the best match
Expand Down
1 change: 1 addition & 0 deletions application/frontend/src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ export const CRE = '/cre';
export const GRAPH = '/graph';
export const DEEPLINK = '/deeplink';
export const BROWSEROOT = '/root_cres';
export const GAP_ANALYSIS = '/gap_analysis';
Loading

0 comments on commit 85b347d

Please sign in to comment.