diff --git a/.gitignore b/.gitignore index cbf1dd8c1..d6db6dd2b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,7 @@ yarn-error.log coverage/ ### Dev db -standards_cache.sqlite \ No newline at end of file +standards_cache.sqlite + +### Neo4j +neo4j/ \ No newline at end of file diff --git a/Makefile b/Makefile index 1e9f86ac7..5da2b61ea 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,9 @@ docker: docker-run: docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD) +docker-neo4j: + docker run --env NEO4J_PLUGINS='["apoc"]' --volume=./neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j + lint: [ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint diff --git a/README.md b/README.md index 6ccf6a916..6e1925678 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,13 @@ To run the web application for development you can run Alternatively, you can use the dockerfile with
make docker && make docker-run
+Some features like Gap Analysis require a neo4j DB running you can start this with +
make docker-neo4j
+enviroment varaibles for app to connect to neo4jDB (default): +- NEO4J_URI (localhost) +- NEO4J_USR (neo4j) +- NEO4J_PASS (password) + To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-run
diff --git a/application/database/db.py b/application/database/db.py index cf5c38da8..ed9eeec11 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,3 +1,5 @@ +from neo4j import GraphDatabase +import neo4j from sqlalchemy.orm import aliased import os import logging @@ -15,6 +17,8 @@ from sqlalchemy.sql.expression import desc # type: ignore import uuid +from application.utils.gap_analysis import get_path_score + from .. import sqla # type: ignore logging.basicConfig() @@ -156,14 +160,217 @@ class Embeddings(BaseModel): # type: ignore ) +class NEO_DB: + __instance = None + + driver = None + connected = False + + @classmethod + def instance(self): + if self.__instance is None: + self.__instance = self.__new__(self) + + URI = os.getenv("NEO4J_URI") or "neo4j://localhost:7687" + AUTH = ( + os.getenv("NEO4J_USR") or "neo4j", + os.getenv("NEO4J_PASS") or "password", + ) + self.driver = GraphDatabase.driver(URI, auth=AUTH) + + try: + self.driver.verify_connectivity() + self.connected = True + except neo4j.exceptions.ServiceUnavailable: + logger.error( + "NEO4J ServiceUnavailable error - disabling neo4j related features" + ) + + return self.__instance + + def __init__(sel): + raise ValueError("NEO_DB is a singleton, please call instance() instead") + + @classmethod + def add_cre(self, dbcre: CRE): + if not self.connected: + return + self.driver.execute_query( + "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", + nid=dbcre.id, + name=dbcre.name, + description=dbcre.description, + external_id=dbcre.external_id, + database_="neo4j", + ) + + @classmethod + def add_dbnode(self, dbnode: Node): + if not self.connected: + return + self.driver.execute_query( + "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", + nid=dbnode.id, + name=dbnode.name, + section=dbnode.section, + section_id=dbnode.section_id, + subsection=dbnode.subsection or "", + tags=dbnode.tags, + version=dbnode.version or "", + description=dbnode.description, + ntype=dbnode.ntype, + database_="neo4j", + ) + + @classmethod + def link_CRE_to_CRE(self, id1, id2, link_type): + if not self.connected: + return + self.driver.execute_query( + "MATCH (a:CRE), (b:CRE) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=id1, + bID=id2, + relType=str.upper(link_type).replace(" ", "_"), + database_="neo4j", + ) + + @classmethod + def link_CRE_to_Node(self, CRE_id, node_id, link_type): + if not self.connected: + return + self.driver.execute_query( + "MATCH (a:CRE), (b:Node) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=CRE_id, + bID=node_id, + relType=str.upper(link_type).replace(" ", "_"), + database_="neo4j", + ) + + @classmethod + def gap_analysis(self, name_1, name_2): + if not self.connected: + return None, None + base_standard, _, _ = self.driver.execute_query( + """ + MATCH (BaseStandard:Node {name: $name1}) + RETURN BaseStandard + """, + name1=name_1, + database_="neo4j", + ) + + path_records_all, _, _ = self.driver.execute_query( + """ + OPTIONAL MATCH (BaseStandard:Node {name: $name1}) + OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) + RETURN p + """, + name1=name_1, + name2=name_2, + database_="neo4j", + ) + path_records, _, _ = self.driver.execute_query( + """ + OPTIONAL MATCH (BaseStandard:Node {name: $name1}) + OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) + RETURN p + """, + name1=name_1, + name2=name_2, + database_="neo4j", + ) + + def format_segment(seg): + return { + "start": { + "name": seg.start_node["name"], + "sectionID": seg.start_node["section_id"], + "section": seg.start_node["section"], + "subsection": seg.start_node["subsection"], + "description": seg.start_node["description"], + "id": seg.start_node["id"], + }, + "end": { + "name": seg.end_node["name"], + "sectionID": seg.end_node["section_id"], + "section": seg.end_node["section"], + "subsection": seg.end_node["subsection"], + "description": seg.end_node["description"], + "id": seg.end_node["id"], + }, + "relationship": seg.type, + } + + def format_path_record(rec): + return { + "start": { + "name": rec.start_node["name"], + "sectionID": rec.start_node["section_id"], + "section": rec.start_node["section"], + "subsection": rec.start_node["subsection"], + "description": rec.start_node["description"], + "id": rec.start_node["id"], + }, + "end": { + "name": rec.end_node["name"], + "sectionID": rec.end_node["section_id"], + "section": rec.end_node["section"], + "subsection": rec.end_node["subsection"], + "description": rec.end_node["description"], + "id": rec.end_node["id"], + }, + "path": [format_segment(seg) for seg in rec.relationships], + } + + def format_record(rec): + return { + "name": rec["name"], + "sectionID": rec["section_id"], + "section": rec["section"], + "subsection": rec["subsection"], + "description": rec["description"], + "id": rec["id"], + } + + return [format_record(rec["BaseStandard"]) for rec in base_standard], [ + format_path_record(rec["p"]) for rec in (path_records + path_records_all) + ] + + @classmethod + def standards(self): + if not self.connected: + return + records, _, _ = self.driver.execute_query( + 'MATCH (n:Node {ntype: "Standard"}) ' "RETURN collect(distinct n.name)", + database_="neo4j", + ) + return records[0][0] + + class CRE_Graph: graph: nx.Graph = None + neo_db: NEO_DB = None __instance = None @classmethod - def instance(cls, session): + def instance(cls, session, neo_db: NEO_DB): if cls.__instance is None: cls.__instance = cls.__new__(cls) + cls.neo_db = neo_db cls.graph = cls.load_cre_graph(session) return cls.__instance @@ -179,6 +386,7 @@ def add_node(self, *args, **kwargs): @classmethod def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: if dbcre: + cls.neo_db.add_cre(dbcre) graph.add_node( f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id ) @@ -189,6 +397,9 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: + cls.neo_db.add_dbnode(dbnode) + # coma separated tags + graph.add_node( "Node: " + str(dbnode.id), internal_id=dbnode.id, @@ -215,6 +426,7 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type) + cls.neo_db.link_CRE_to_CRE(il.group, il.cre, il.type) for lnk in session.query(Links).all(): node = session.query(Node).filter(Node.id == lnk.node).first() @@ -226,16 +438,19 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type) + cls.neo_db.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) return graph class Node_collection: graph: nx.Graph = None + neo_db: NEO_DB = None session = sqla.session def __init__(self) -> None: if not os.environ.get("NO_LOAD_GRAPH"): - self.graph = CRE_Graph.instance(sqla.session) + self.neo_db = NEO_DB.instance() + self.graph = CRE_Graph.instance(sqla.session, self.neo_db) self.session = sqla.session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: @@ -1059,30 +1274,32 @@ def find_path_between_nodes( return res - def gap_analysis(self, node_names: List[str]) -> List[cre_defs.Node]: - """Since the CRE structure is a tree-like graph with - leaves being nodes we can find the paths between nodes - find_path_between_nodes() is a graph-path-finding method - """ - processed_nodes = [] - dbnodes: List[Node] = [] - for name in node_names: - dbnodes.extend(self.session.query(Node).filter(Node.name == name).all()) - - for node in dbnodes: - working_node = nodeFromDB(node) - for other_node in dbnodes: - if node.id == other_node.id: - continue - if self.find_path_between_nodes(node.id, other_node.id): - working_node.add_link( - cre_defs.Link( - ltype=cre_defs.LinkTypes.LinkedTo, - document=nodeFromDB(other_node), - ) - ) - processed_nodes.append(working_node) - return processed_nodes + def gap_analysis(self, node_names: List[str]): + if not self.neo_db.connected: + return None + base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) + if base_standard is None: + return None + grouped_paths = {} + for node in base_standard: + key = node["id"] + if key not in grouped_paths: + grouped_paths[key] = {"start": node, "paths": {}} + + for path in paths: + key = path["start"]["id"] + end_key = path["end"]["id"] + path["score"] = get_path_score(path) + del path["start"] + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: + grouped_paths[key]["paths"][end_key] = path + return grouped_paths + + def standards(self): + return self.neo_db.standards() def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: """Given a piece of text, tries to find the best match diff --git a/application/frontend/src/const.ts b/application/frontend/src/const.ts index 231f78447..cc2afdfc8 100644 --- a/application/frontend/src/const.ts +++ b/application/frontend/src/const.ts @@ -36,3 +36,4 @@ export const CRE = '/cre'; export const GRAPH = '/graph'; export const DEEPLINK = '/deeplink'; export const BROWSEROOT = '/root_cres'; +export const GAP_ANALYSIS = '/gap_analysis'; diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx new file mode 100644 index 000000000..833407f23 --- /dev/null +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -0,0 +1,299 @@ +import axios from 'axios'; +import React, { useEffect, useState } from 'react'; +import { useLocation } from 'react-router-dom'; +import { + Accordion, + Button, + Container, + Dropdown, + DropdownItemProps, + Grid, + Icon, + Label, + Popup, + Table, +} from 'semantic-ui-react'; + +import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; +import { useEnvironment } from '../../hooks'; + +const GetSegmentText = (segment, segmentID) => { + let textPart = segment.end; + let nextID = segment.end.id; + let arrow = '->'; + if (segmentID !== segment.start.id) { + textPart = segment.start; + nextID = segment.start.id; + arrow = '<-'; + } + const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID ?? ''} ${ + textPart.section ?? '' + } ${textPart.subsection ?? ''} ${textPart.description ?? ''}`; + return { text, nextID }; +}; + +function useQuery() { + const { search } = useLocation(); + + return React.useMemo(() => new URLSearchParams(search), [search]); +} + +export const GapAnalysis = () => { + const standardOptionsDefault = [{ key: '', text: '', value: undefined }]; + const searchParams = useQuery(); + const [standardOptions, setStandardOptions] = useState( + standardOptionsDefault + ); + const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ''); + const [CompareStandard, setCompareStandard] = useState( + searchParams.get('compare') ?? '' + ); + const [gapAnalysis, setGapAnalysis] = useState(); + const [activeIndex, SetActiveIndex] = useState(); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const { apiUrl } = useEnvironment(); + + const GetStrength = (score) => { + if (score < 5) return 'Strong'; + if (score > 20) return 'Weak'; + return 'Average'; + }; + + const GetStrengthColor = (score) => { + if (score < 5) return 'Green'; + if (score > 20) return 'Red'; + return 'Orange'; + }; + + useEffect(() => { + const fetchData = async () => { + const result = await axios.get(`${apiUrl}/standards`); + setLoading(false); + setStandardOptions( + standardOptionsDefault.concat(result.data.sort().map((x) => ({ key: x, text: x, value: x }))) + ); + }; + + setLoading(true); + fetchData().catch((e) => { + setLoading(false); + setError(e.response.data.message ?? e.message); + }); + }, [setStandardOptions, setLoading, setError]); + + useEffect(() => { + const fetchData = async () => { + const result = await axios.get( + `${apiUrl}/gap_analysis?standard=${BaseStandard}&standard=${CompareStandard}` + ); + setLoading(false); + setGapAnalysis(result.data); + }; + + if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; + setGapAnalysis(undefined); + setLoading(true); + fetchData().catch((e) => { + setLoading(false); + setError(e.response.data.message ?? e.message); + }); + }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); + + const handleAccordionClick = (e, titleProps) => { + const { index } = titleProps; + const newIndex = activeIndex === index ? -1 : index; + SetActiveIndex(newIndex); + }; + + return ( +
+ + + + + + + + + + {gapAnalysis && ( + <> + + Generally: lower is better +
+ {GetStrength(0)}: Closely connected likely to have + majority overlap +
+ {GetStrength(6)}: Connected likely to have partial + overlap +
+ {GetStrength(22)}: Weakly connected likely to + have small or no overlap +
+
+ + + + + )} +
+ + {gapAnalysis && ( + + + + {BaseStandard} + {CompareStandard} + + + + + {Object.keys(gapAnalysis).map((key) => ( + + +

+ + {gapAnalysis[key].start.name} {gapAnalysis[key].start.section}{' '} + {gapAnalysis[key].start.subsection} + + + + {' '} +
+ {gapAnalysis[key].start.sectionID} + {gapAnalysis[key].start.description} +

+
+ + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(0, 3) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} + {path.end.description} ( + + {GetStrength(path.score)}:{path.score} + + ){' '} + + + + + } + /> +
+
+ ); + })} + {Object.keys(gapAnalysis[key].paths).length > 3 && ( + + + + + + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(3, gapAnalysis[key].paths.length) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section}{' '} + {path.end.subsection} {path.end.description}( + + {GetStrength(path.score)}:{path.score} + + ){' '} + + + + + } + /> +
+
+ ); + })} +
+
+ )} + {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} +
+
+ ))} +
+
+ )} +
+ ); +}; diff --git a/application/frontend/src/pages/chatbot/chatbot.tsx b/application/frontend/src/pages/chatbot/chatbot.tsx index 73eec06e7..03e657a5e 100644 --- a/application/frontend/src/pages/chatbot/chatbot.tsx +++ b/application/frontend/src/pages/chatbot/chatbot.tsx @@ -327,7 +327,7 @@ export const Chatbot = () => { -
OWASP Chat-CRE
+
OWASP OpenCRE Chat
@@ -388,7 +388,7 @@ export const Chatbot = () => {
- ChatCRE uses Google's PALM2 LLM, you can find the code for OpenCRE in + OpenCRE Chat uses Google's PALM2 LLM, you can find the code for OpenCRE in https://github.com/owaps/OpenCRE. Your question travels to Heroku (OpenCRE hosting provider) and then to GCP over a protected connection. Your data is never stored in the OpenCRE servers, you can start a new session by refreshing your page. The OpenCRE team has taken all diff --git a/application/frontend/src/routes.tsx b/application/frontend/src/routes.tsx index 876462503..548c2d7a3 100644 --- a/application/frontend/src/routes.tsx +++ b/application/frontend/src/routes.tsx @@ -1,10 +1,22 @@ import { ReactNode } from 'react'; -import { BROWSEROOT, CRE, DEEPLINK, GRAPH, INDEX, SEARCH, SECTION, SECTION_ID, STANDARD } from './const'; +import { + BROWSEROOT, + CRE, + DEEPLINK, + GAP_ANALYSIS, + GRAPH, + INDEX, + SEARCH, + SECTION, + SECTION_ID, + STANDARD, +} from './const'; import { CommonRequirementEnumeration, Graph, Search, Standard } from './pages'; import { BrowseRootCres } from './pages/BrowseRootCres/browseRootCres'; import { Chatbot } from './pages/chatbot/chatbot'; import { Deeplink } from './pages/Deeplink/Deeplink'; +import { GapAnalysis } from './pages/GapAnalysis/GapAnalysis'; import { MembershipRequired } from './pages/MembershipRequired/MembershipRequired'; import { SearchName } from './pages/Search/SearchName'; import { StandardSection } from './pages/Standard/StandardSection'; @@ -23,6 +35,12 @@ export const ROUTES: IRoute[] = [ showFilter: false, showHeader: false, }, + { + path: GAP_ANALYSIS, + component: GapAnalysis, + showHeader: true, + showFilter: false, + }, { path: `/node${STANDARD}/:id${SECTION}/:section`, component: StandardSection, diff --git a/application/frontend/src/scaffolding/Header/Header.tsx b/application/frontend/src/scaffolding/Header/Header.tsx index aa872fb43..c2652d80e 100644 --- a/application/frontend/src/scaffolding/Header/Header.tsx +++ b/application/frontend/src/scaffolding/Header/Header.tsx @@ -13,6 +13,10 @@ const getLinks = (): { to: string; name: string }[] => [ to: `/`, name: 'Open CRE', }, + { + to: `/gap_analysis`, + name: 'Gap Analysis', + }, ]; export const Header = () => { diff --git a/application/frontend/src/scaffolding/Header/header.scss b/application/frontend/src/scaffolding/Header/header.scss index e01e85568..faec51d53 100644 --- a/application/frontend/src/scaffolding/Header/header.scss +++ b/application/frontend/src/scaffolding/Header/header.scss @@ -20,6 +20,7 @@ padding-top: 10px; padding-bottom: 10px; text-align: center; + margin: 0 2px; .item { color: white !important; diff --git a/application/prompt_client/vertex_prompt_client.py b/application/prompt_client/vertex_prompt_client.py index 4a05b545c..28f625e6e 100644 --- a/application/prompt_client/vertex_prompt_client.py +++ b/application/prompt_client/vertex_prompt_client.py @@ -27,7 +27,7 @@ class VertexPromptClient: context = ( - 'You are "chat-CRE" a chatbot for security information that exists in opencre.org. ' + 'You are "OpenCRE Chat" a chatbot for security information that exists in opencre.org. ' "You will be given text and code related to security topics and you will be questioned on these topics, " "please answer the questions based on the content provided with code examples. " "Delimit any code snippet with three backticks." diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 84de848ec..173bfb274 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1,6 +1,7 @@ import os import tempfile import unittest +from unittest.mock import patch import uuid from copy import copy, deepcopy from pprint import pprint @@ -761,153 +762,6 @@ def test_get_nodes_with_pagination(self) -> None: (None, None, None), ) - def test_gap_analysis(self) -> None: - """Given - the following standards SA1, SA2, SA3 SAA1 , SB1, SD1, SDD1, SW1, SX1 - the following CREs CA, CB, CC, CD, CDD , CW, CX - the following links - CC -> CA, CB,CD - CD -> CDD - CA-> SA1, SAA1 - CB -> SB1 - CD -> SD1 - CDD -> SDD1 - CW -> SW1 - CX -> SA3, SX1 - NoCRE -> SA2 - - Then: - gap_analysis(SA) returns SA1, SA2, SA3 - gap_analysis(SA,SAA) returns SA1 <-> SAA1, SA2, SA3 - gap_analysis(SA,SDD) returns SA1 <-> SDD1, SA2, SA3 - gap_analysis(SA, SW) returns SA1,SA2,SA3, SW1 # no connection - gap_analysis(SA, SB, SD, SW) returns SA1 <->(SB1,SD1), SA2 , SW1, SA3 - gap_analysis(SA, SX) returns SA1, SA2, SA3->SX1 - - give me a single standard - give me two standards connected by same cre - give me two standards connected by cres who are children of the same cre - give me two standards connected by completely different cres - give me two standards with sections on different trees. - - give me two standards without connections - give me 3 or more standards - - """ - - collection = db.Node_collection() - collection.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) - - cres = { - "dbca": collection.add_cre(defs.CRE(id="1", description="CA", name="CA")), - "dbcb": collection.add_cre(defs.CRE(id="2", description="CB", name="CB")), - "dbcc": collection.add_cre(defs.CRE(id="3", description="CC", name="CC")), - "dbcd": collection.add_cre(defs.CRE(id="4", description="CD", name="CD")), - "dbcdd": collection.add_cre( - defs.CRE(id="5", description="CDD", name="CDD") - ), - "dbcw": collection.add_cre(defs.CRE(id="6", description="CW", name="CW")), - "dbcx": collection.add_cre(defs.CRE(id="7", description="CX", name="CX")), - } - def_standards = { - "sa1": defs.Standard(name="SA", section="SA1"), - "sa2": defs.Standard(name="SA", section="SA2"), - "sa3": defs.Standard(name="SA", section="SA3"), - "saa1": defs.Standard(name="SAA", section="SAA1"), - "sb1": defs.Standard(name="SB", section="SB1"), - "sd1": defs.Standard(name="SD", section="SD1"), - "sdd1": defs.Standard(name="SDD", section="SDD1"), - "sw1": defs.Standard(name="SW", section="SW1"), - "sx1": defs.Standard(name="SX", section="SX1"), - } - standards = {} - for k, s in def_standards.items(): - standards["db" + k] = collection.add_node(s) - ltype = defs.LinkTypes.LinkedTo - collection.add_link(cre=cres["dbca"], node=standards["dbsa1"]) - collection.add_link(cre=cres["dbca"], node=standards["dbsaa1"]) - collection.add_link(cre=cres["dbcb"], node=standards["dbsb1"]) - collection.add_link(cre=cres["dbcd"], node=standards["dbsd1"]) - collection.add_link(cre=cres["dbcdd"], node=standards["dbsdd1"]) - collection.add_link(cre=cres["dbcw"], node=standards["dbsw1"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsa3"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsx1"]) - - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbca"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcb"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcd"]) - collection.add_internal_link(group=cres["dbcd"], cre=cres["dbcdd"]) - - expected = { - "SA": [def_standards["sa1"], def_standards["sa2"], def_standards["sa3"]], - "SA,SAA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SAA,SA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SDD": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sdd1"]) - ), - copy(def_standards["sdd1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SW": [ - def_standards["sa1"], - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SB,SD,SW": [ - copy(def_standards["sa1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sb1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sd1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])), - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SX": [ - def_standards["sa1"], - def_standards["sa2"], - copy(def_standards["sa3"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sx1"]) - ), - copy(def_standards["sx1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa3"]) - ), - ], - } - - self.maxDiff = None - for args, expected_vals in expected.items(): - stands = args.split(",") - res = collection.gap_analysis(stands) - self.assertCountEqual(res, expected_vals) - def test_add_internal_link(self) -> None: """test that internal links are added successfully, edge cases: @@ -1283,6 +1137,173 @@ def test_get_root_cres(self): self.maxDiff = None self.assertEqual(root_cres, [cres[0], cres[1], cres[7]]) + def test_gap_analysis_disconnected(self): + collection = db.Node_collection() + collection.neo_db.connected = False + self.assertEqual(collection.gap_analysis(["a", "b"]), None) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_no_nodes(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([], []) + self.assertEqual(collection.gap_analysis(["a", "b"]), {}) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_no_links(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([{"id": 1}], []) + self.assertEqual( + collection.gap_analysis(["a", "b"]), {1: {"start": {"id": 1}, "paths": {}}} + ) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_one_link(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": {"id": "a"}, + }, + ] + gap_mock.return_value = ( + [{"id": 1}], + [{"start": {"id": 1}, "end": {"id": 2}, "path": path}], + ) + expected = { + 1: { + "start": {"id": 1}, + "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + } + } + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": {"id": "a"}, + }, + ] + path2 = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "RELATED", + "start": {"id": "a"}, + }, + ] + gap_mock.return_value = ( + [{"id": 1}], + [ + {"start": {"id": 1}, "end": {"id": 2}, "path": path}, + {"start": {"id": 1}, "end": {"id": 2}, "path": path2}, + ], + ) + expected = { + 1: { + "start": {"id": 1}, + "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + } + } + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": {"id": "a"}, + }, + ] + path2 = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "RELATED", + "start": {"id": "a"}, + }, + ] + gap_mock.return_value = ( + [{"id": 1}], + [ + {"start": {"id": 1}, "end": {"id": 2}, "path": path2}, + {"start": {"id": 1}, "end": {"id": 2}, "path": path}, + ], + ) + expected = { + 1: { + "start": {"id": 1}, + "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + } + } + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + if __name__ == "__main__": unittest.main() diff --git a/application/tests/gap_analysis_test.py b/application/tests/gap_analysis_test.py new file mode 100644 index 000000000..396da8ee4 --- /dev/null +++ b/application/tests/gap_analysis_test.py @@ -0,0 +1,246 @@ +import unittest + +from application.utils.gap_analysis import ( + get_path_score, + get_relation_direction, + get_next_id, + PENALTIES, +) + + +class TestGapAnalysis(unittest.TestCase): + def tearDown(self) -> None: + return None + + def setUp(self) -> None: + return None + + def test_get_relation_direction_UP(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_relation_direction(step, "123"), "UP") + + def test_get_relation_direction_DOWN(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_relation_direction(step, "234"), "DOWN") + + def test_get_next_id_start(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_next_id(step, "234"), "123") + + def test_get_next_id_end(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_next_id(step, "123"), "234") + + def test_get_path_score_direct_siblings_returns_zero(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "e2ac59b2-c1d8-4525-a6b3-155d480aecc9", + }, + }, + ], + } + self.assertEqual(get_path_score(path), 0) + + def test_get_path_score_one_up_returns_one_up_penaltiy(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "123", + }, + "relationship": "CONTAINS", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_UP"]) + + def test_get_path_score_one_down_one_returns_one_down_penaltiy(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "CONTAINS", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_DOWN"]) + + def test_get_path_score_related_returns_related_penalty(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "RELATED", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["RELATED"]) + + def test_get_path_score_one_of_each_returns_penalty(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "CONTAINS", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "456", + }, + "relationship": "RELATED", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "CONTAINS", + "start": { + "id": "456", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "456", + }, + }, + ], + } + self.assertEqual( + get_path_score(path), + PENALTIES["RELATED"] + + PENALTIES["CONTAINS_UP"] + + PENALTIES["CONTAINS_DOWN"], + ) diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py new file mode 100644 index 000000000..47f97e830 --- /dev/null +++ b/application/utils/gap_analysis.py @@ -0,0 +1,26 @@ +PENALTIES = {"RELATED": 20, "CONTAINS_UP": 2, "CONTAINS_DOWN": 1, "LINKED_TO": 0} + + +def get_path_score(path): + score = 0 + previous_id = path["start"]["id"] + for step in path["path"]: + penalty_type = step["relationship"] + + if step["relationship"] == "CONTAINS": + penalty_type = f"CONTAINS_{get_relation_direction(step, previous_id)}" + score += PENALTIES[penalty_type] + previous_id = get_next_id(step, previous_id) + return score + + +def get_relation_direction(step, previous_id): + if step["start"]["id"] == previous_id: + return "UP" + return "DOWN" + + +def get_next_id(step, previous_id): + if step["start"]["id"] == previous_id: + return step["end"]["id"] + return step["start"]["id"] diff --git a/application/web/web_main.py b/application/web/web_main.py index 50955eed9..c6fd97907 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -65,6 +65,18 @@ def extend_cre_with_tag_links( return cre +def neo4j_not_running_rejection(): + logger.info("Neo4j is disabled") + return ( + jsonify( + { + "message": "Backend services connected to this feature are not running at the moment." + } + ), + 500, + ) + + @app.route("/rest/v1/id/", methods=["GET"]) @app.route("/rest/v1/name/", methods=["GET"]) @cache.cached(timeout=50) @@ -205,13 +217,23 @@ def find_document_by_tag() -> Any: @app.route("/rest/v1/gap_analysis", methods=["GET"]) @cache.cached(timeout=50) -def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet +def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - documents = database.gap_analysis(standards=standards) - if documents: - res = [doc.todict() for doc in documents] - return jsonify(res) + gap_analysis = database.gap_analysis(standards) + if gap_analysis is None: + return neo4j_not_running_rejection() + return jsonify(gap_analysis) + + +@app.route("/rest/v1/standards", methods=["GET"]) +@cache.cached(timeout=50) +def standards() -> Any: + database = db.Node_collection() + standards = database.standards() + if standards is None: + neo4j_not_running_rejection() + return standards @app.route("/rest/v1/text_search", methods=["GET"]) diff --git a/requirements.txt b/requirements.txt index 7eb93e3db..025dbc184 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,5 +29,81 @@ semver setuptools==66.1.1 simplify_docx==0.1.2 SQLAlchemy==2.0.20 +compliance-trestle +nose==1.3.7 +numpy==1.23.0 +neo4j==5.11.0 +openapi-schema-validator==0.3.4 +openapi-spec-validator==0.5.1 +openpyxl==3.1.0 +orderedmultidict==1.0.1 +orjson==3.8.5 +packaging +paramiko==3.0.0 +pathable==0.4.3 +pathspec==0.9.0 +pbr==5.8.0 +pep517==0.8.2 +Pillow==9.1.1 +pip-autoremove==0.9.1 +platformdirs==2.2.0 +playwright==1.33.0 +pluggy==1.0.0 +prance +prompt-toolkit==3.0.19 +proto-plus==1.22.2 +protobuf==4.23.1 +psycopg2==2.9.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.7.0 +pycparser==2.21 +pydantic==1.10.4 +pyee==9.0.4 +pyflakes==2.3.1 +PyGithub==1.53 +PyJWT==1.7.1 +PyNaCl==1.5.0 +pyparsing==2.4.6 +pyrsistent==0.17.3 +PySnooper==1.1.1 +pytest==7.3.1 +pytest-base-url==2.0.0 +pytest-playwright==0.3.3 +python-dateutil==2.8.1 +python-docx==0.8.11 +python-dotenv==0.21.1 +python-frontmatter==1.0.0 +python-markdown-maker==1.0 +python-slugify==8.0.1 +PyYAML==5.3.1 +regex==2021.11.10 +requests==2.27.1 +requests-oauthlib==1.3.1 +rfc3986==1.5.0 +rsa==4.7 +ruamel.yaml==0.17.21 +ruamel.yaml.clib==0.2.7 +scikit-learn==1.2.2 +Shapely==1.8.5.post1 +simplify-docx==0.1.2 +six==1.15.0 +smmap==3.0.4 +sniffio==1.3.0 +soupsieve==2.4.1 +SQLAlchemy==1.3.23 +sqlalchemy-stubs==0.4 +testresources==2.0.1 +text-unidecode==1.3 +threadpoolctl==3.1.0 +toml==0.10.2 +tomli==1.2.2 +tqdm==4.65.0 +typed-ast==1.5.4 +types-PyYAML==5.4.8 +typing-inspect==0.7.1 +typing_extensions==4.4.0 +untangle==1.1.1 +urllib3==1.26.8 vertexai==0.0.1 xmltodict==0.13.0