diff --git a/src/lib/parser-json-sarif.cc b/src/lib/parser-json-sarif.cc index 0aa88a0..33676ae 100644 --- a/src/lib/parser-json-sarif.cc +++ b/src/lib/parser-json-sarif.cc @@ -26,8 +26,11 @@ struct SarifTreeDecoder::Private { void updateCweMap(const pt::ptree *driverNode); + void readToolInfo(TScanProps *pScanProps, const pt::ptree *toolNode); std::string singleChecker = "UNKNOWN_SARIF_WARNING"; + std::string pwd; + const RE reFileUrl = RE("^file://"); const RE reCwe = RE("^CWE-([0-9]+)$"); const RE reVersion = RE("^([0-9][0-9.]+).*$"); const RE reRuleId = @@ -80,35 +83,15 @@ void SarifTreeDecoder::Private::updateCweMap(const pt::ptree *driverNode) } } -void SarifTreeDecoder::readScanProps( - TScanProps *pDst, - const pt::ptree *root) +void SarifTreeDecoder::Private::readToolInfo( + TScanProps *pScanProps, + const pt::ptree *toolNode) { - // read external properties if available - const pt::ptree *iep; - if (findChildOf(&iep, *root, "inlineExternalProperties") - && (1U == iep->size())) - { - const pt::ptree *props; - if (findChildOf(&props, iep->begin()->second, "externalizedProperties")) - for (const pt::ptree::value_type &item : *props) - (*pDst)[item.first] = item.second.data(); - } - - // check that we have exactly one run - const pt::ptree *runs; - if (!findChildOf(&runs, *root, "runs") || (1U != runs->size())) - return; - - // check which tool was used for the run - const pt::ptree *toolNode; - if (!findChildOf(&toolNode, runs->begin()->second, "tool")) - return; const pt::ptree *driverNode; if (!findChildOf(&driverNode, *toolNode, "driver")) return; - d->updateCweMap(driverNode); + this->updateCweMap(driverNode); const auto name = valueOf(*driverNode, "name"); auto version = valueOf(*driverNode, "version"); @@ -117,33 +100,79 @@ void SarifTreeDecoder::readScanProps( if (name == "SnykCode") { // Snyk Code detected! - d->singleChecker = "SNYK_CODE_WARNING"; + this->singleChecker = "SNYK_CODE_WARNING"; if (!version.empty()) // record tool version of Snyk Code - (*pDst)["analyzer-version-snyk-code"] = std::move(version); + (*pScanProps)["analyzer-version-snyk-code"] = std::move(version); } else if (name == "gitleaks") { // gitleaks - d->singleChecker = "GITLEAKS_WARNING"; + this->singleChecker = "GITLEAKS_WARNING"; if (!version.empty()) - (*pDst)["analyzer-version-gitleaks"] = std::move(version); + (*pScanProps)["analyzer-version-gitleaks"] = std::move(version); } else if (name == "Semgrep OSS") { // semgrep - d->singleChecker = "SEMGREP_WARNING"; + this->singleChecker = "SEMGREP_WARNING"; if (!version.empty()) - (*pDst)["analyzer-version-semgrep"] = std::move(version); + (*pScanProps)["analyzer-version-semgrep"] = std::move(version); } else if (boost::starts_with(name, "GNU C")) { // GCC - d->singleChecker = "COMPILER_WARNING"; + this->singleChecker = "COMPILER_WARNING"; boost::smatch sm; - if (boost::regex_match(version, sm, d->reVersion)) - (*pDst)["analyzer-version-gcc"] = sm[/* version */ 1]; + if (boost::regex_match(version, sm, this->reVersion)) + (*pScanProps)["analyzer-version-gcc"] = sm[/* version */ 1]; + } +} + +void SarifTreeDecoder::readScanProps( + TScanProps *pDst, + const pt::ptree *root) +{ + // read external properties if available + const pt::ptree *iep; + if (findChildOf(&iep, *root, "inlineExternalProperties") + && (1U == iep->size())) + { + const pt::ptree *props; + if (findChildOf(&props, iep->begin()->second, "externalizedProperties")) + for (const pt::ptree::value_type &item : *props) + (*pDst)[item.first] = item.second.data(); + } + + // check that we have exactly one run + const pt::ptree *runs; + if (!findChildOf(&runs, *root, "runs") + || /* TODO: warn bout unsupported format */ (1U != runs->size())) + return; + + // jump to the only run + const pt::ptree &run0 = runs->begin()->second; + + // check which tool was used for the run + const pt::ptree *toolNode; + if (findChildOf(&toolNode, run0, "tool")) + d->readToolInfo(pDst, toolNode); + + // read PWD so that we can reconstruct absolute paths later on + const pt::ptree *uriBase, *pwdNode, *uriNode; + if (findChildOf(&uriBase, run0, "originalUriBaseIds") + && findChildOf(&pwdNode, *uriBase, "PWD") + && findChildOf(&uriNode, *pwdNode, "uri")) + { + // remove the "file://" prefix + const auto &pwd = uriNode->data(); + d->pwd = boost::regex_replace(pwd, d->reFileUrl, ""); + // FIXME: Should we check whether d->pwd begins with '/'? + + // make sure that d->pwd ends with '/' + if (!d->pwd.empty() && *d->pwd.rbegin() != '/') + d->pwd += '/'; } } @@ -310,6 +339,32 @@ static int sarifCweFromDefNode(const pt::ptree &defNode) return 0; } +static void expandRelativePaths(Defect *pDef, const std::string &pwd) +{ + if (pwd.empty()) + // no PWD info provided + return; + + // go through all events + for (DefEvent &evt : pDef->events) { + std::string &fileName = evt.fileName; + if (fileName.empty()) + // no file path to expand + continue; + + const unsigned char beginsWith = *fileName.begin(); + switch (beginsWith) { + case '/': // absolute path + case '<': // and the like + continue; + + default: + // prepend `pwd` to relative path + fileName = pwd + fileName; + } + } +} + bool SarifTreeDecoder::readNode(Defect *def) { // move the iterator after we get the current position @@ -377,6 +432,7 @@ bool SarifTreeDecoder::readNode(Defect *def) if (findChildOf(&relatedLocs, defNode, "relatedLocations")) sarifReadComments(def, *relatedLocs); + expandRelativePaths(def, d->pwd); d->digger.inferLangFromChecker(def); d->digger.inferToolFromChecker(def); diff --git a/tests/csgrep/0129-sarif-gcc-pwd-args.txt b/tests/csgrep/0129-sarif-gcc-pwd-args.txt new file mode 100644 index 0000000..7df3c95 --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-args.txt @@ -0,0 +1 @@ +--mode=json diff --git a/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt b/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt new file mode 100644 index 0000000..bcb88f1 --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt @@ -0,0 +1,209 @@ +{ + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [ + { + "tool": { + "driver": { + "name": "GNU C17", + "fullName": "GNU C17 (GCC) version 14.2.1 20240912 (Red Hat 14.2.1-4) (x86_64-redhat-linux)", + "version": "14.2.1 20240912 (Red Hat 14.2.1-4)", + "informationUri": "https://gcc.gnu.org/gcc-14/", + "rules": [ + { + "id": "-Wanalyzer-null-dereference", + "helpUri": "https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Static-Analyzer-Options.html#index-Wanalyzer-null-dereference" + } + ] + } + }, + "taxonomies": [ + { + "name": "CWE", + "version": "4.7", + "organization": "MITRE", + "shortDescription": { + "text": "The MITRE Common Weakness Enumeration" + }, + "taxa": [ + { + "id": "476", + "helpUri": "https://cwe.mitre.org/data/definitions/476.html" + } + ] + } + ], + "invocations": [ + { + "executionSuccessful": true, + "toolExecutionNotifications": [] + } + ], + "originalUriBaseIds": { + "PWD": { + "uri": "file:///home/kdudka/" + } + }, + "artifacts": [ + { + "location": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "contents": { + "text": "int main()\n{\n int *p = 0;\n return *p;\n}\n" + }, + "sourceLanguage": "c" + } + ], + "results": [ + { + "ruleId": "-Wanalyzer-null-dereference", + "taxa": [ + { + "id": "476", + "toolComponent": { + "name": "cwe" + } + } + ], + "properties": { + "gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/enode": 4, + "gcc/analyzer/saved_diagnostic/snode": 1, + "gcc/analyzer/saved_diagnostic/sval": "(int *)0B", + "gcc/analyzer/saved_diagnostic/state": "null", + "gcc/analyzer/saved_diagnostic/idx": 0 + }, + "level": "warning", + "message": { + "text": "dereference of NULL \u2018p\u2019" + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 4, + "startColumn": 12, + "endColumn": 14 + }, + "contextRegion": { + "startLine": 4, + "snippet": { + "text": " return *p;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ] + } + ], + "codeFlows": [ + { + "threadFlows": [ + { + "id": "main", + "locations": [ + { + "properties": { + "gcc/analyzer/checker_event/emission_id": "(1)", + "gcc/analyzer/checker_event/kind": "EK_STATE_CHANGE" + }, + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 3, + "startColumn": 10, + "endColumn": 11 + }, + "contextRegion": { + "startLine": 3, + "snippet": { + "text": " int *p = 0;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ], + "message": { + "text": "\u2018p\u2019 is NULL" + } + }, + "kinds": [ + "release", + "memory" + ], + "nestingLevel": 1, + "executionOrder": 1 + }, + { + "properties": { + "gcc/analyzer/checker_event/emission_id": "(2)", + "gcc/analyzer/checker_event/kind": "EK_WARNING" + }, + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 4, + "startColumn": 12, + "endColumn": 14 + }, + "contextRegion": { + "startLine": 4, + "snippet": { + "text": " return *p;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ], + "message": { + "text": "dereference of NULL \u2018p\u2019" + } + }, + "kinds": [ + "danger" + ], + "nestingLevel": 1, + "executionOrder": 2 + } + ] + } + ] + } + ] + } + ] + } + ] +} diff --git a/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt b/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt new file mode 100644 index 0000000..d42c906 --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt @@ -0,0 +1,43 @@ +{ + "scan": { + "analyzer-version-gcc": "14.2.1" + }, + "defects": [ + { + "checker": "GCC_ANALYZER_WARNING", + "cwe": 476, + "language": "c/c++", + "tool": "gcc-analyzer", + "key_event_idx": 0, + "events": [ + { + "file_name": "/home/kdudka/xxx.c", + "line": 4, + "column": 12, + "h_size": 2, + "event": "warning[-Wanalyzer-null-dereference]", + "message": "dereference of NULL ‘p’", + "verbosity_level": 0 + }, + { + "file_name": "/home/kdudka/xxx.c", + "line": 3, + "column": 10, + "h_size": 1, + "event": "release_memory", + "message": "‘p’ is NULL", + "verbosity_level": 1 + }, + { + "file_name": "/home/kdudka/xxx.c", + "line": 4, + "column": 12, + "h_size": 2, + "event": "danger", + "message": "dereference of NULL ‘p’", + "verbosity_level": 1 + } + ] + } + ] +} diff --git a/tests/csgrep/CMakeLists.txt b/tests/csgrep/CMakeLists.txt index 233d574..f064444 100644 --- a/tests/csgrep/CMakeLists.txt +++ b/tests/csgrep/CMakeLists.txt @@ -172,4 +172,5 @@ test_csgrep("0125-sarif-parser-bom" ) test_csgrep("0126-cov-parser-imp-flag" ) test_csgrep("0127-cov-writer-noloc" ) test_csgrep("0128-cov-parser-noloc" ) +test_csgrep("0129-sarif-gcc-pwd" ) test_csgrep("0131-unicontrol-perl-man-page" )