From f63b6dc452c2221f9788a0ceca37710c38c2c536 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 3 Jul 2023 08:49:01 +0200 Subject: [PATCH 01/26] LLVMTaintConfigYAML class --- .../TaintConfig/LLVMTaintConfigYAML.h | 90 +++++++++++++++++++ .../TaintConfig/LLVMTaintConfigYAML.cpp | 37 ++++++++ 2 files changed, 127 insertions(+) create mode 100644 include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h create mode 100644 lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h new file mode 100644 index 000000000..1cd0e4ef7 --- /dev/null +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h @@ -0,0 +1,90 @@ +/****************************************************************************** + * Copyright (c) 2021 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Maximilian Leo Huber and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H +#define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" + +#include +#include + +namespace psr { +class LLVMTaintConfigYAML; +class LLVMProjectIRDB; + +template <> struct TaintConfigTraits { + using n_t = const llvm::Instruction *; + using v_t = const llvm::Value *; + using f_t = const llvm::Function *; +}; + +class LLVMTaintConfigYAML : public TaintConfigBase { + friend TaintConfigBase; + +public: + explicit LLVMTaintConfigYAML(const psr::LLVMProjectIRDB &Code, + const llvm::Twine &Path); + explicit LLVMTaintConfigYAML(const psr::LLVMProjectIRDB &AnnotatedCode); + explicit LLVMTaintConfigYAML( + TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, + TaintDescriptionCallBackTy SanitizerCB = {}) noexcept; + void loadYAML(const llvm::Twine &Path); + void addSourceValue(const llvm::Value *V); + void addSinkValue(const llvm::Value *V); + void addSanitizerValue(const llvm::Value *V); + void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); + void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); + +private: + [[nodiscard]] bool isSourceImpl(const llvm::Value *V) const; + [[nodiscard]] bool isSinkImpl(const llvm::Value *V) const; + [[nodiscard]] bool isSanitizerImpl(const llvm::Value *V) const; + + void forAllGeneratedValuesAtImpl( + const llvm::Instruction *Inst, const llvm::Function *Callee, + llvm::function_ref Handler) const; + + void forAllLeakCandidatesAtImpl( + const llvm::Instruction *Inst, const llvm::Function *Callee, + llvm::function_ref Handler) const; + + void forAllSanitizedValuesAtImpl( + const llvm::Instruction *Inst, const llvm::Function *Callee, + llvm::function_ref Handler) const; + + [[nodiscard]] bool generatesValuesAtImpl(const llvm::Instruction *Inst, + const llvm::Function *Callee) const; + [[nodiscard]] bool mayLeakValuesAtImpl(const llvm::Instruction *Inst, + const llvm::Function *Callee) const; + [[nodiscard]] bool sanitizesValuesAtImpl(const llvm::Instruction *Inst, + const llvm::Function *Callee) const; + + [[nodiscard]] TaintCategory getCategoryImpl(const llvm::Value *V) const; + + [[nodiscard]] std::map> + makeInitialSeedsImpl() const; + + void printImpl(llvm::raw_ostream &OS) const; + + // --- utilities + + void addAllFunctions(const LLVMProjectIRDB &IRDB); + // --- data members + std::unordered_set SourceValues; + std::unordered_set SinkValues; + std::unordered_set SanitizerValues; +}; + +extern template class TaintConfigBase; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp new file mode 100644 index 000000000..7bdb02267 --- /dev/null +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp @@ -0,0 +1,37 @@ +/****************************************************************************** + * Copyright (c) 2021 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Maximilian Leo Huber and others + *****************************************************************************/ +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h" + +#include "llvm/Support/ErrorHandling.h" + +#include + +namespace psr { + +void LLVMTaintConfigYAML::loadYAML(const llvm::Twine &Path) { + std::ifstream File; + File.open(Path.str().c_str()); + + if (File.fail()) { + llvm::report_fatal_error("File could not be opened: " + Path); + return; + } + + // if file exists and is openable, loop over file and extract all neccesary + // information + std::string Line; + unsigned int LineCounter = 0; + while (std::getline(File, Line)) { + LineCounter++; + } + + File.close(); +} + +} // namespace psr From 77d35cd173401e1bad67ebc62995655eaa80d917 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 3 Jul 2023 19:01:18 +0200 Subject: [PATCH 02/26] basic structure --- .../TaintConfig/LLVMTaintConfigYAML.h | 90 ------------------- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 58 ++++++++++++ .../TaintConfig/LLVMTaintConfigYAML.cpp | 37 -------- .../TaintConfig/TaintConfigData.cpp | 38 ++++++++ 4 files changed, 96 insertions(+), 127 deletions(-) delete mode 100644 include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h create mode 100644 include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h delete mode 100644 lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp create mode 100644 lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h deleted file mode 100644 index 1cd0e4ef7..000000000 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h +++ /dev/null @@ -1,90 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2021 Philipp Schubert. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Maximilian Leo Huber and others - *****************************************************************************/ - -#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H -#define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" - -#include -#include - -namespace psr { -class LLVMTaintConfigYAML; -class LLVMProjectIRDB; - -template <> struct TaintConfigTraits { - using n_t = const llvm::Instruction *; - using v_t = const llvm::Value *; - using f_t = const llvm::Function *; -}; - -class LLVMTaintConfigYAML : public TaintConfigBase { - friend TaintConfigBase; - -public: - explicit LLVMTaintConfigYAML(const psr::LLVMProjectIRDB &Code, - const llvm::Twine &Path); - explicit LLVMTaintConfigYAML(const psr::LLVMProjectIRDB &AnnotatedCode); - explicit LLVMTaintConfigYAML( - TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, - TaintDescriptionCallBackTy SanitizerCB = {}) noexcept; - void loadYAML(const llvm::Twine &Path); - void addSourceValue(const llvm::Value *V); - void addSinkValue(const llvm::Value *V); - void addSanitizerValue(const llvm::Value *V); - void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); - void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); - -private: - [[nodiscard]] bool isSourceImpl(const llvm::Value *V) const; - [[nodiscard]] bool isSinkImpl(const llvm::Value *V) const; - [[nodiscard]] bool isSanitizerImpl(const llvm::Value *V) const; - - void forAllGeneratedValuesAtImpl( - const llvm::Instruction *Inst, const llvm::Function *Callee, - llvm::function_ref Handler) const; - - void forAllLeakCandidatesAtImpl( - const llvm::Instruction *Inst, const llvm::Function *Callee, - llvm::function_ref Handler) const; - - void forAllSanitizedValuesAtImpl( - const llvm::Instruction *Inst, const llvm::Function *Callee, - llvm::function_ref Handler) const; - - [[nodiscard]] bool generatesValuesAtImpl(const llvm::Instruction *Inst, - const llvm::Function *Callee) const; - [[nodiscard]] bool mayLeakValuesAtImpl(const llvm::Instruction *Inst, - const llvm::Function *Callee) const; - [[nodiscard]] bool sanitizesValuesAtImpl(const llvm::Instruction *Inst, - const llvm::Function *Callee) const; - - [[nodiscard]] TaintCategory getCategoryImpl(const llvm::Value *V) const; - - [[nodiscard]] std::map> - makeInitialSeedsImpl() const; - - void printImpl(llvm::raw_ostream &OS) const; - - // --- utilities - - void addAllFunctions(const LLVMProjectIRDB &IRDB); - // --- data members - std::unordered_set SourceValues; - std::unordered_set SinkValues; - std::unordered_set SanitizerValues; -}; - -extern template class TaintConfigBase; - -} // namespace psr - -#endif // PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIGYAML_H diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h new file mode 100644 index 000000000..d12823bd2 --- /dev/null +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -0,0 +1,58 @@ +/****************************************************************************** + * Copyright (c) 2023 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Maximilian Leo Huber and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H +#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H + +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" + +#include + +namespace psr { +class TaintConfigData; + +template <> struct TaintConfigTraits { + using n_t = const llvm::Instruction *; + using v_t = const llvm::Value *; + using f_t = const llvm::Function *; +}; + +class TaintConfigData : public TaintConfigBase { + friend TaintConfigBase; + +public: + TaintConfigData(const llvm::Twine &Path); + + void loadDataFromFile(); + void addDataToFile(); + + inline void addSourceValue(v_t Value) { SourceValues.insert(Value); } + inline void addSinkValue(v_t Value) { SinkValues.insert(Value); } + inline void addSanitizerValue(v_t Value) { SanitizerValues.insert(Value); } + + inline std::unordered_set getInstructions() { return SourceValues; } + inline std::unordered_set getVariables() { return SinkValues; } + inline std::unordered_set getFunctions() { return SanitizerValues; } + +private: + llvm::Twine Path; + std::unordered_set SourceValues; + std::unordered_set SinkValues; + std::unordered_set SanitizerValues; +}; + +extern template class TaintConfigBase; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H \ No newline at end of file diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp deleted file mode 100644 index 7bdb02267..000000000 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2021 Philipp Schubert. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Maximilian Leo Huber and others - *****************************************************************************/ -#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfigYAML.h" - -#include "llvm/Support/ErrorHandling.h" - -#include - -namespace psr { - -void LLVMTaintConfigYAML::loadYAML(const llvm::Twine &Path) { - std::ifstream File; - File.open(Path.str().c_str()); - - if (File.fail()) { - llvm::report_fatal_error("File could not be opened: " + Path); - return; - } - - // if file exists and is openable, loop over file and extract all neccesary - // information - std::string Line; - unsigned int LineCounter = 0; - while (std::getline(File, Line)) { - LineCounter++; - } - - File.close(); -} - -} // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp new file mode 100644 index 000000000..5637c1a9f --- /dev/null +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -0,0 +1,38 @@ +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" + +#include "phasar/Utils/NlohmannLogging.h" + +#include + +namespace psr { + +TaintConfigData::TaintConfigData(const llvm::Twine &Path) : Path(Path) {} + +void TaintConfigData::loadDataFromFile() { + // retrieve data from file + nlohmann::json Config = parseTaintConfig(Path); + + // load data from nlohmann::json +} + +void TaintConfigData::addDataToFile() { + nlohmann::json Config; + + // TODO (max): add data to nlohmann::json Config + for (const auto &Source : SourceValues) { + Config.push_back({"SourceValues", {{Source->getName().str()}}}); + } + + for (const auto &Sink : SinkValues) { + Config.push_back({"SinkValues", {{Sink->getName().str()}}}); + } + + for (const auto &Sanitizer : SanitizerValues) { + Config.push_back({"SanitizerValues", {{Sanitizer->getName().str()}}}); + } + + std::ofstream File(Path.str()); + File << Config; +} + +} // namespace psr From e8b12769ba733e1cc7bc36c00402c8683112357f Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 6 Jul 2023 07:20:26 +0200 Subject: [PATCH 03/26] beginning of restructuring --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 5 ++-- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 15 ++++++++-- .../TaintConfig/LLVMTaintConfig.cpp | 20 ++++++------- .../TaintConfig/TaintConfigData.cpp | 28 ++++++++++++++++--- 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index 04c11650f..e8feaf969 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIG_H #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "llvm/IR/Instruction.h" @@ -31,7 +32,7 @@ class LLVMTaintConfig : public TaintConfigBase { public: explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const nlohmann::json &Config); + const TaintConfigData &Config); explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode); explicit LLVMTaintConfig( TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, @@ -93,7 +94,7 @@ class LLVMTaintConfig : public TaintConfigBase { // --- utilities void addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); + const TaintConfigData &Config); // --- data members diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index d12823bd2..ee301188c 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -40,15 +40,24 @@ class TaintConfigData : public TaintConfigBase { inline void addSinkValue(v_t Value) { SinkValues.insert(Value); } inline void addSanitizerValue(v_t Value) { SanitizerValues.insert(Value); } - inline std::unordered_set getInstructions() { return SourceValues; } - inline std::unordered_set getVariables() { return SinkValues; } - inline std::unordered_set getFunctions() { return SanitizerValues; } + inline std::unordered_set getSourceValues() const { + return SourceValues; + } + inline std::unordered_set getSinkValues() const { return SinkValues; } + inline std::unordered_set getSanitizerValues() const { + return SanitizerValues; + } + inline std::unordered_set getFunctions() const { return Functions; } + inline bool hasFunctions() const { return !Functions.empty(); } private: llvm::Twine Path; + std::unordered_set Functions; std::unordered_set SourceValues; std::unordered_set SinkValues; std::unordered_set SanitizerValues; + + void printImpl(llvm::raw_ostream &OS) const; }; extern template class TaintConfigBase; diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 06958f7ed..4a900537b 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -12,10 +12,10 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/Annotation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -59,9 +59,9 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { } void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); + const TaintConfigData &Config) { + for (const auto &FunDesc : Config.getFunctions()) { + auto Name = FunDesc->getName().str(); auto FnDefs = findAllFunctionDefs(IRDB, Name); @@ -73,8 +73,8 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const auto *Fun = FnDefs[0]; // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; + if (!FunDesc->arg_empty()) { + auto Params = FunDesc->arg_begin(); if (Params.contains("source")) { for (unsigned Idx : Params["source"]) { if (Idx >= Fun->arg_size()) { @@ -133,16 +133,16 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, } LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const nlohmann::json &Config) { + const TaintConfigData &Config) { // handle functions - if (Config.contains("functions")) { + if (Config.hasFunctions()) { addAllFunctions(Code, Config); } // handle variables if (Config.contains("variables")) { // scope can be a function name or a struct. - std::unordered_map + std::unordered_map StructConfigMap; // read all struct types from config @@ -156,7 +156,7 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, Ty->getName().equals(VarDesc["scope"].get())) { for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { StructConfigMap.insert( - std::pair( + std::pair( LlvmStructTy, VarDesc)); } } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 5637c1a9f..651644b3d 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -1,5 +1,6 @@ #include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/NlohmannLogging.h" #include @@ -9,16 +10,12 @@ namespace psr { TaintConfigData::TaintConfigData(const llvm::Twine &Path) : Path(Path) {} void TaintConfigData::loadDataFromFile() { - // retrieve data from file nlohmann::json Config = parseTaintConfig(Path); - - // load data from nlohmann::json } void TaintConfigData::addDataToFile() { nlohmann::json Config; - // TODO (max): add data to nlohmann::json Config for (const auto &Source : SourceValues) { Config.push_back({"SourceValues", {{Source->getName().str()}}}); } @@ -35,4 +32,27 @@ void TaintConfigData::addDataToFile() { File << Config; } +void TaintConfigData::printImpl(llvm::raw_ostream &OS) const { + OS << "TaintConfiguration in TaintConfigData: "; + if (SourceValues.empty() && SinkValues.empty() && SanitizerValues.empty() && + !getRegisteredSourceCallBack() && !getRegisteredSinkCallBack()) { + OS << "empty"; + return; + } + OS << "\n\tSourceCallBack registered: " << (bool)SourceCallBack << '\n'; + OS << "\tSinkCallBack registered: " << (bool)SinkCallBack << '\n'; + OS << "\tSources (" << SourceValues.size() << "):\n"; + for (const auto *SourceValue : SourceValues) { + OS << "\t\t" << psr::llvmIRToString(SourceValue) << '\n'; + } + OS << "\tSinks (" << SinkValues.size() << "):\n"; + for (const auto *SinkValue : SinkValues) { + OS << "\t\t" << psr::llvmIRToString(SinkValue) << '\n'; + } + OS << "\tSanitizers (" << SanitizerValues.size() << "):\n"; + for (const auto *SanitizerValue : SanitizerValues) { + OS << "\t\t" << psr::llvmIRToString(SanitizerValue) << '\n'; + } +} + } // namespace psr From 7bf384fe2c0f98bc4326362687e4bc979d2c84ad Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 19 Jul 2023 08:06:35 +0200 Subject: [PATCH 04/26] compiles, untested --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 5 - .../PhasarLLVM/TaintConfig/TaintConfigData.h | 62 ++-- lib/Controller/AnalysisController.cpp | 5 +- .../TaintConfig/LLVMTaintConfig.cpp | 141 +------- .../TaintConfig/TaintConfigData.cpp | 312 ++++++++++++++++-- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 5 +- .../Mono/InterMonoTaintAnalysisTest.cpp | 4 +- .../TaintConfig/TaintConfigTest.cpp | 24 +- 8 files changed, 337 insertions(+), 221 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index e8feaf969..ddbe2bfe2 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -91,11 +91,6 @@ class LLVMTaintConfig : public TaintConfigBase { void printImpl(llvm::raw_ostream &OS) const; - // --- utilities - - void addAllFunctions(const LLVMProjectIRDB &IRDB, - const TaintConfigData &Config); - // --- data members std::unordered_set SourceValues; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index ee301188c..4639d0e82 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -10,58 +10,58 @@ #ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" #include +#include + namespace psr { class TaintConfigData; -template <> struct TaintConfigTraits { - using n_t = const llvm::Instruction *; - using v_t = const llvm::Value *; - using f_t = const llvm::Function *; -}; - -class TaintConfigData : public TaintConfigBase { - friend TaintConfigBase; - +class TaintConfigData { public: - TaintConfigData(const llvm::Twine &Path); + TaintConfigData() = default; + TaintConfigData(const llvm::Twine &Path, const LLVMProjectIRDB &IRDB); + TaintConfigData(const nlohmann::json &JSON, const LLVMProjectIRDB &IRDB); - void loadDataFromFile(); - void addDataToFile(); + static TaintConfigData loadDataFromFile(const llvm::Twine &Path, + const LLVMProjectIRDB &IRDB); + void addDataToFile(const llvm::Twine &Path); - inline void addSourceValue(v_t Value) { SourceValues.insert(Value); } - inline void addSinkValue(v_t Value) { SinkValues.insert(Value); } - inline void addSanitizerValue(v_t Value) { SanitizerValues.insert(Value); } + inline void addSourceValue(std::string Value) { + SourceValues.insert(std::move(Value)); + } + inline void addSinkValue(std::string Value) { + SinkValues.insert(std::move(Value)); + } + inline void addSanitizerValue(std::string Value) { + SanitizerValues.insert(std::move(Value)); + } + + void getValuesFromJSON(nlohmann::json JSON); - inline std::unordered_set getSourceValues() const { + inline const std::unordered_set &getSourceValues() const { return SourceValues; } - inline std::unordered_set getSinkValues() const { return SinkValues; } - inline std::unordered_set getSanitizerValues() const { + inline const std::unordered_set &getSinkValues() const { + return SinkValues; + } + inline const std::unordered_set &getSanitizerValues() const { return SanitizerValues; } - inline std::unordered_set getFunctions() const { return Functions; } - inline bool hasFunctions() const { return !Functions.empty(); } private: - llvm::Twine Path; - std::unordered_set Functions; - std::unordered_set SourceValues; - std::unordered_set SinkValues; - std::unordered_set SanitizerValues; - - void printImpl(llvm::raw_ostream &OS) const; + void loadDataFromFileForThis(const llvm::Twine &Path, + const LLVMProjectIRDB &IRDB); + std::unordered_set SourceValues; + std::unordered_set SinkValues; + std::unordered_set SanitizerValues; }; -extern template class TaintConfigBase; - } // namespace psr #endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H \ No newline at end of file diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index 1341319a0..f6f64e5bc 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -28,6 +28,8 @@ #include #include +#include + namespace psr { AnalysisController::AnalysisController( @@ -216,7 +218,8 @@ LLVMTaintConfig AnalysisController::makeTaintConfig() { !AnalysisConfigs.empty() ? AnalysisConfigs[0] : ""; return !AnalysisConfigPath.empty() ? LLVMTaintConfig(HA.getProjectIRDB(), - parseTaintConfig(AnalysisConfigPath)) + TaintConfigData((llvm::Twine)AnalysisConfigPath, + HA.getProjectIRDB())) : LLVMTaintConfig(HA.getProjectIRDB()); } diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 4a900537b..a0ae4075d 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -58,148 +58,9 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { return FnDefs; } -void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, - const TaintConfigData &Config) { - for (const auto &FunDesc : Config.getFunctions()) { - auto Name = FunDesc->getName().str(); - - auto FnDefs = findAllFunctionDefs(IRDB, Name); - - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } - - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (!FunDesc->arg_empty()) { - auto Params = FunDesc->arg_begin(); - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); - } - } - } - // handle a function's return value - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc["ret"].get()); - } - } - } -} - LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, const TaintConfigData &Config) { - // handle functions - if (Config.hasFunctions()) { - addAllFunctions(Code, Config); - } - - // handle variables - if (Config.contains("variables")) { - // scope can be a function name or a struct. - std::unordered_map - StructConfigMap; - - // read all struct types from config - for (const auto &VarDesc : Config["variables"]) { - llvm::DebugInfoFinder DIF; - const auto *M = Code.getModule(); - - DIF.processModule(*M); - for (const auto &Ty : DIF.types()) { - if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc["scope"].get())) { - for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { - StructConfigMap.insert( - std::pair( - LlvmStructTy, VarDesc)); - } - } - } - DIF.reset(); - } - - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config["variables"]) { - for (const auto &Fun : Code.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = - llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().equals( - VarDesc["name"].get()) && - LocalVar->getLine() == VarDesc["line"].get()) { - addTaintCategory(DbgDeclare->getAddress(), - VarDesc["cat"].get()); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc["name"].get(); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { - addTaintCategory(Gep, VarDesc["cat"].get()); - } - } - } - } - } - } - } - } + // TODO } LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) { diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 651644b3d..9f0c328ea 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -1,57 +1,311 @@ #include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/IO.h" #include "phasar/Utils/NlohmannLogging.h" -#include +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/raw_ostream.h" + +#include namespace psr { -TaintConfigData::TaintConfigData(const llvm::Twine &Path) : Path(Path) {} +TaintConfigData::TaintConfigData(const llvm::Twine &Path, + const LLVMProjectIRDB &IRDB) { + loadDataFromFileForThis(Path, IRDB); +} + +static llvm::SmallVector +findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { + llvm::SmallVector FnDefs; + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &SubProgram : DIF.subprograms()) { + if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && + (SubProgram->getName() == Name || + SubProgram->getLinkageName() == Name)) { + FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); + } + } + DIF.reset(); + + if (FnDefs.empty()) { + const auto *F = IRDB.getFunction(Name); + if (F) { + FnDefs.push_back(F); + } + } else if (FnDefs.size() > 1) { + llvm::errs() << "The function name '" << Name + << "' is ambiguous. Possible candidates are:\n"; + for (const auto *F : FnDefs) { + llvm::errs() << "> " << F->getName() << "\n"; + } + llvm::errs() << "Please further specify the function's name, such that it " + "becomes unambiguous\n"; + } + + return FnDefs; +} + +TaintConfigData::TaintConfigData(const nlohmann::json &JSON, + const LLVMProjectIRDB &IRDB) { + for (const auto &FunDesc : JSON["functions"]) { + auto Name = FunDesc["name"].get(); + + auto FnDefs = findAllFunctionDefs(IRDB, Name); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; + continue; + } -void TaintConfigData::loadDataFromFile() { - nlohmann::json Config = parseTaintConfig(Path); + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + SourceValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + SinkValues.insert(Fun->getArg(Idx)->getName().str()); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + SinkValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + SanitizerValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + } + // handle a function's return value + /* + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + Data.addTaintValue(User, FunDesc["ret"].get()); + } + }*/ + } } -void TaintConfigData::addDataToFile() { +TaintConfigData TaintConfigData::loadDataFromFile(const llvm::Twine &Path, + const LLVMProjectIRDB &IRDB) { + TaintConfigData Data = TaintConfigData(); + nlohmann::json Config = readJsonFile(Path); + for (const auto &FunDesc : Config["functions"]) { + auto Name = FunDesc["name"].get(); + + auto FnDefs = findAllFunctionDefs(IRDB, Name); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; + continue; + } + + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + Data.addSourceValue(Fun->getArg(Idx)->getName().str()); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + Data.addSinkValue(Fun->getArg(Idx)->getName().str()); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + Data.addSinkValue(Fun->getArg(Idx)->getName().str()); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + Data.addSanitizerValue(Fun->getArg(Idx)->getName().str()); + } + } + } + // handle a function's return value + /* + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + Data.addTaintValue(User, FunDesc["ret"].get()); + } + }*/ + } + + return Data; +} + +void TaintConfigData::addDataToFile(const llvm::Twine &Path) { nlohmann::json Config; for (const auto &Source : SourceValues) { - Config.push_back({"SourceValues", {{Source->getName().str()}}}); + Config.push_back({"SourceValues", {{Source}}}); } for (const auto &Sink : SinkValues) { - Config.push_back({"SinkValues", {{Sink->getName().str()}}}); + Config.push_back({"SinkValues", {{Sink}}}); } for (const auto &Sanitizer : SanitizerValues) { - Config.push_back({"SanitizerValues", {{Sanitizer->getName().str()}}}); + Config.push_back({"SanitizerValues", {{Sanitizer}}}); + } + + std::error_code FileError; + llvm::raw_fd_ostream File(Path.str(), FileError); + + if (FileError) { + llvm::errs() << "Error while creating file: " << Path.str() << "\n"; } - std::ofstream File(Path.str()); File << Config; } -void TaintConfigData::printImpl(llvm::raw_ostream &OS) const { - OS << "TaintConfiguration in TaintConfigData: "; - if (SourceValues.empty() && SinkValues.empty() && SanitizerValues.empty() && - !getRegisteredSourceCallBack() && !getRegisteredSinkCallBack()) { - OS << "empty"; - return; - } - OS << "\n\tSourceCallBack registered: " << (bool)SourceCallBack << '\n'; - OS << "\tSinkCallBack registered: " << (bool)SinkCallBack << '\n'; - OS << "\tSources (" << SourceValues.size() << "):\n"; - for (const auto *SourceValue : SourceValues) { - OS << "\t\t" << psr::llvmIRToString(SourceValue) << '\n'; - } - OS << "\tSinks (" << SinkValues.size() << "):\n"; - for (const auto *SinkValue : SinkValues) { - OS << "\t\t" << psr::llvmIRToString(SinkValue) << '\n'; - } - OS << "\tSanitizers (" << SanitizerValues.size() << "):\n"; - for (const auto *SanitizerValue : SanitizerValues) { - OS << "\t\t" << psr::llvmIRToString(SanitizerValue) << '\n'; +void TaintConfigData::getValuesFromJSON(nlohmann::json JSON) { + // TODO: +} + +void TaintConfigData::loadDataFromFileForThis(const llvm::Twine &Path, + const LLVMProjectIRDB &IRDB) { + nlohmann::json Config = readJsonFile(Path); + for (const auto &FunDesc : Config["functions"]) { + auto Name = FunDesc["name"].get(); + + auto FnDefs = findAllFunctionDefs(IRDB, Name); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; + continue; + } + + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + SourceValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + SinkValues.insert(Fun->getArg(Idx)->getName().str()); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + SinkValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + SanitizerValues.insert(Fun->getArg(Idx)->getName().str()); + } + } + } + // handle a function's return value + /* + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + Data.addTaintValue(User, FunDesc["ret"].get()); + } + }*/ } } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 8f942e762..63ef715a8 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -64,8 +64,9 @@ class IDETaintAnalysisTest : public ::testing::Test { return LLVMTaintConfig(HA.getProjectIRDB()); }, [&](json *JS) { - auto Ret = - LLVMTaintConfig(HA.getProjectIRDB(), *JS); + auto Ret = LLVMTaintConfig( + HA.getProjectIRDB(), + TaintConfigData(*JS, HA.getProjectIRDB())); if (DumpResults) { llvm::errs() << Ret << "\n"; } diff --git a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp index 0ab2c5bc7..0335006cb 100644 --- a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp @@ -34,7 +34,9 @@ class InterMonoTaintAnalysisTest : public ::testing::Test { auto ConfigPath = (PathToLlFiles + "config.json").str(); auto BuildPos = ConfigPath.rfind("/build/") + 1; ConfigPath.erase(BuildPos, 6); - LLVMTaintConfig TC(HA.getProjectIRDB(), parseTaintConfig(ConfigPath)); + LLVMTaintConfig TC( + HA.getProjectIRDB(), + TaintConfigData(parseTaintConfig(ConfigPath), HA.getProjectIRDB())); TC.registerSinkCallBack([](const llvm::Instruction *Inst) { std::set Ret; if (const auto *Call = llvm::dyn_cast(Inst); diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 530c74838..8f7c2aaff 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -233,7 +233,7 @@ TEST_F(TaintConfigTest, Array_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -246,7 +246,7 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -258,7 +258,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const auto *Bar = IR.getFunction("bar"); assert(Bar); @@ -282,7 +282,7 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); const llvm::Value *I2 = IR.getInstruction(18); @@ -296,7 +296,7 @@ TEST_F(TaintConfigTest, Basic_03_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const auto *TaintPair = IR.getFunction("taintPair"); assert(TaintPair); @@ -314,7 +314,7 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); @@ -327,7 +327,7 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); @@ -342,7 +342,7 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { @@ -366,7 +366,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); @@ -398,7 +398,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); for (const auto *F : IR.getAllFunctions()) { @@ -420,7 +420,7 @@ TEST_F(TaintConfigTest, StaticFun_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); @@ -442,7 +442,7 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); llvm::outs() << TConfig << '\n'; const llvm::Value *CallInst = IR.getInstruction(13); const auto *I = llvm::dyn_cast(CallInst); From 4b8d77044f51826fc9c566cb1ee7ae46c55f894c Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 19 Jul 2023 19:42:55 +0200 Subject: [PATCH 05/26] Constructor init --- .../TaintConfig/LLVMTaintConfig.cpp | 42 ++++--------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index a0ae4075d..4b43ce6f2 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -24,43 +24,19 @@ namespace psr { -static llvm::SmallVector -findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { - llvm::SmallVector FnDefs; - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); - - DIF.processModule(*M); - for (const auto &SubProgram : DIF.subprograms()) { - if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && - (SubProgram->getName() == Name || - SubProgram->getLinkageName() == Name)) { - FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); - } +LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const TaintConfigData &Config) { + for (const auto &Value : Config.getSourceValues()) { + SourceValues.insert(Code.getFunction(Value)); } - DIF.reset(); - if (FnDefs.empty()) { - const auto *F = IRDB.getFunction(Name); - if (F) { - FnDefs.push_back(F); - } - } else if (FnDefs.size() > 1) { - llvm::errs() << "The function name '" << Name - << "' is ambiguous. Possible candidates are:\n"; - for (const auto *F : FnDefs) { - llvm::errs() << "> " << F->getName() << "\n"; - } - llvm::errs() << "Please further specify the function's name, such that it " - "becomes unambiguous\n"; + for (const auto &Value : Config.getSinkValues()) { + SinkValues.insert(Code.getFunction(Value)); } - return FnDefs; -} - -LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const TaintConfigData &Config) { - // TODO + for (const auto &Value : Config.getSanitizerValues()) { + SanitizerValues.insert(Code.getFunction(Value)); + } } LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) { From b1f3d81201e99c1154edb86ddea63e90569b5d59 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 21 Jul 2023 09:57:29 +0200 Subject: [PATCH 06/26] minor bug fixes --- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 14 +- .../TaintConfig/LLVMTaintConfig.cpp | 2 - .../TaintConfig/TaintConfigData.cpp | 155 ++++++++++++++++-- .../TaintConfig/TaintConfigTest.cpp | 1 + 4 files changed, 152 insertions(+), 20 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 4639d0e82..a0b272169 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" @@ -26,12 +27,17 @@ class TaintConfigData { public: TaintConfigData() = default; TaintConfigData(const llvm::Twine &Path, const LLVMProjectIRDB &IRDB); - TaintConfigData(const nlohmann::json &JSON, const LLVMProjectIRDB &IRDB); + TaintConfigData(const nlohmann::json &Config, const LLVMProjectIRDB &IRDB); static TaintConfigData loadDataFromFile(const llvm::Twine &Path, const LLVMProjectIRDB &IRDB); void addDataToFile(const llvm::Twine &Path); + void addAllFunctions(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config); + void addAllVariables(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config); + inline void addSourceValue(std::string Value) { SourceValues.insert(std::move(Value)); } @@ -42,6 +48,12 @@ class TaintConfigData { SanitizerValues.insert(std::move(Value)); } + void addSourceValue(const llvm::Value *V); + void addSinkValue(const llvm::Value *V); + void addSanitizerValue(const llvm::Value *V); + void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); + void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); + void getValuesFromJSON(nlohmann::json JSON); inline const std::unordered_set &getSourceValues() const { diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 4b43ce6f2..e914e0073 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -29,11 +29,9 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, for (const auto &Value : Config.getSourceValues()) { SourceValues.insert(Code.getFunction(Value)); } - for (const auto &Value : Config.getSinkValues()) { SinkValues.insert(Code.getFunction(Value)); } - for (const auto &Value : Config.getSanitizerValues()) { SanitizerValues.insert(Code.getFunction(Value)); } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 9f0c328ea..4e290f2c3 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -2,9 +2,12 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IO.h" +#include "phasar/Utils/Logger.h" #include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" #include @@ -50,9 +53,49 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { return FnDefs; } -TaintConfigData::TaintConfigData(const nlohmann::json &JSON, - const LLVMProjectIRDB &IRDB) { - for (const auto &FunDesc : JSON["functions"]) { +void TaintConfigData::addSourceValue(const llvm::Value *V) { + SourceValues.insert(V->getName().str()); +} + +void TaintConfigData::addSinkValue(const llvm::Value *V) { + SinkValues.insert(V->getName().str()); +} + +void TaintConfigData::addSanitizerValue(const llvm::Value *V) { + SanitizerValues.insert(V->getName().str()); +} + +void TaintConfigData::addTaintCategory(const llvm::Value *Val, + TaintCategory Annotation) { + switch (Annotation) { + case TaintCategory::Source: + addSourceValue(Val); + break; + case TaintCategory::Sink: + addSinkValue(Val); + break; + case TaintCategory::Sanitizer: + addSanitizerValue(Val); + break; + default: + // ignore + break; + } +} + +void TaintConfigData::addTaintCategory(const llvm::Value *Val, + llvm::StringRef AnnotationStr) { + auto TC = toTaintCategory(AnnotationStr); + if (TC == TaintCategory::None) { + PHASAR_LOG_LEVEL(ERROR, "Unknown taint category: " << AnnotationStr); + } else { + addTaintCategory(Val, TC); + } +} + +void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config) { + for (const auto &FunDesc : Config["functions"]) { auto Name = FunDesc["name"].get(); auto FnDefs = findAllFunctionDefs(IRDB, Name); @@ -78,7 +121,7 @@ TaintConfigData::TaintConfigData(const nlohmann::json &JSON, // remaining parameters as well continue; } - SourceValues.insert(Fun->getArg(Idx)->getName().str()); + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); } } if (Params.contains("sink")) { @@ -91,12 +134,12 @@ TaintConfigData::TaintConfigData(const nlohmann::json &JSON, << Idx << "\n"; continue; } - SinkValues.insert(Fun->getArg(Idx)->getName().str()); + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } else if (Idx.is_string()) { const auto Sinks = Idx.get(); if (Sinks == "all") { for (const auto &Arg : Fun->args()) { - SinkValues.insert(Fun->getArg(Idx)->getName().str()); + addTaintCategory(&Arg, TaintCategory::Sink); } } } @@ -111,17 +154,98 @@ TaintConfigData::TaintConfigData(const nlohmann::json &JSON, << Idx << "\n"; continue; } - SanitizerValues.insert(Fun->getArg(Idx)->getName().str()); + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); } } } // handle a function's return value - /* if (FunDesc.contains("ret")) { for (const auto &User : Fun->users()) { - Data.addTaintValue(User, FunDesc["ret"].get()); + addTaintCategory(User, FunDesc["ret"].get()); } - }*/ + } + } +} + +void TaintConfigData::addAllVariables(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config) { + // scope can be a function name or a struct. + std::unordered_map StructConfigMap; + + // read all struct types from config + for (const auto &VarDesc : Config["variables"]) { + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &Ty : DIF.types()) { + if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && + Ty->getName().equals(VarDesc["scope"].get())) { + for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { + StructConfigMap.insert( + std::pair(LlvmStructTy, + VarDesc)); + } + } + } + DIF.reset(); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + for (const auto &VarDesc : Config["variables"]) { + for (const auto &Fun : IRDB.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().equals(VarDesc["name"].get()) && + LocalVar->getLine() == VarDesc["line"].get()) { + addTaintCategory(DbgDeclare->getAddress(), + VarDesc["cat"].get()); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + const auto VarDesc = StructConfigMap.at(StType); + auto VarName = VarDesc["name"].get(); + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { + addTaintCategory(Gep, VarDesc["cat"].get()); + } + } + } + } + } + } + } +} + +TaintConfigData::TaintConfigData(const nlohmann::json &Config, + const LLVMProjectIRDB &IRDB) { + // handle functions + if (Config.contains("functions")) { + addAllFunctions(IRDB, Config); + } + + // handle variables + if (Config.contains("variables")) { + addAllVariables(IRDB, Config); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + for (const auto &VarDesc : Config["variables"]) { + for (const auto &Fun : IRDB.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + // + } + } } } @@ -236,6 +360,10 @@ void TaintConfigData::getValuesFromJSON(nlohmann::json JSON) { void TaintConfigData::loadDataFromFileForThis(const llvm::Twine &Path, const LLVMProjectIRDB &IRDB) { nlohmann::json Config = readJsonFile(Path); + + if (!Config.contains("functions")) { + return; + } for (const auto &FunDesc : Config["functions"]) { auto Name = FunDesc["name"].get(); @@ -299,13 +427,6 @@ void TaintConfigData::loadDataFromFileForThis(const llvm::Twine &Path, } } } - // handle a function's return value - /* - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - Data.addTaintValue(User, FunDesc["ret"].get()); - } - }*/ } } diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 8f7c2aaff..0e3eda105 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -229,6 +229,7 @@ static constexpr auto PathToJsonTaintConfigTestCode = TEST_F(TaintConfigTest, Array_01_Json) { const std::string File = "array_01_c_dbg.ll"; const std::string Config = "array_01_config.json"; + auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); From d51bd3a114bf65bc3533cbd3354e4226bd440560 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Tue, 25 Jul 2023 12:36:56 +0200 Subject: [PATCH 07/26] new start w better approach --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 8 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 43 +- lib/Controller/AnalysisController.cpp | 24 +- lib/PhasarLLVM/TaintConfig/CMakeLists.txt | 2 - .../TaintConfig/LLVMTaintConfig.cpp | 125 +++++- .../TaintConfig/TaintConfigData.cpp | 394 +++++------------- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 6 +- .../Mono/InterMonoTaintAnalysisTest.cpp | 7 +- .../TaintConfig/TaintConfigTest.cpp | 25 +- 9 files changed, 261 insertions(+), 373 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index ddbe2bfe2..e74d4b1d5 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -31,8 +31,7 @@ class LLVMTaintConfig : public TaintConfigBase { friend TaintConfigBase; public: - explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const TaintConfigData &Config); + explicit LLVMTaintConfig(TaintConfigData &Config); explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode); explicit LLVMTaintConfig( TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, @@ -91,6 +90,11 @@ class LLVMTaintConfig : public TaintConfigBase { void printImpl(llvm::raw_ostream &OS) const; + // --- utilities + + void addAllFunctions(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config); + // --- data members std::unordered_set SourceValues; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index a0b272169..3dfa283c1 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -25,55 +25,34 @@ class TaintConfigData; class TaintConfigData { public: - TaintConfigData() = default; - TaintConfigData(const llvm::Twine &Path, const LLVMProjectIRDB &IRDB); - TaintConfigData(const nlohmann::json &Config, const LLVMProjectIRDB &IRDB); - - static TaintConfigData loadDataFromFile(const llvm::Twine &Path, - const LLVMProjectIRDB &IRDB); - void addDataToFile(const llvm::Twine &Path); - - void addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - void addAllVariables(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - - inline void addSourceValue(std::string Value) { - SourceValues.insert(std::move(Value)); - } - inline void addSinkValue(std::string Value) { - SinkValues.insert(std::move(Value)); - } - inline void addSanitizerValue(std::string Value) { - SanitizerValues.insert(std::move(Value)); - } + TaintConfigData(const LLVMProjectIRDB &IRDB, const nlohmann::json &Config); void addSourceValue(const llvm::Value *V); void addSinkValue(const llvm::Value *V); void addSanitizerValue(const llvm::Value *V); void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); + // --- utilities - void getValuesFromJSON(nlohmann::json JSON); + void addAllFunctions(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config); - inline const std::unordered_set &getSourceValues() const { + inline std::unordered_set getAllSourceValues() const { return SourceValues; } - inline const std::unordered_set &getSinkValues() const { + inline std::unordered_set getAllSinkValues() const { return SinkValues; } - inline const std::unordered_set &getSanitizerValues() const { + inline std::unordered_set getAllSanitizerValues() const { return SanitizerValues; } private: - void loadDataFromFileForThis(const llvm::Twine &Path, - const LLVMProjectIRDB &IRDB); - std::unordered_set SourceValues; - std::unordered_set SinkValues; - std::unordered_set SanitizerValues; + std::unordered_set SourceValues; + std::unordered_set SinkValues; + std::unordered_set SanitizerValues; }; } // namespace psr -#endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H \ No newline at end of file +#endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index f6f64e5bc..5f771265b 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -15,8 +15,8 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/HelperAnalyses.h" #include "phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" -#include "phasar/Utils/NlohmannLogging.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/STLExtras.h" @@ -28,8 +28,6 @@ #include #include -#include - namespace psr { AnalysisController::AnalysisController( @@ -187,10 +185,6 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { WithResultFileOrStdout("/psr-cg.txt", [this](auto &OS) { HA.getICFG().print(OS); }); } - if (EmitterOptions & AnalysisControllerEmitterOptions::EmitCGAsJson) { - WithResultFileOrStdout( - "/psr-cg.json", [this](auto &OS) { OS << HA.getICFG().getAsJson(); }); - } if (EmitterOptions & (AnalysisControllerEmitterOptions::EmitStatisticsAsJson | @@ -202,7 +196,15 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { if (EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsText) { - llvm::outs() << Stats << '\n'; + llvm::outs() << "Module " << IRDB.getModule()->getName() << ":\n"; + llvm::outs() << "> LLVM IR instructions:\t" << IRDB.getNumInstructions() + << "\n"; + llvm::outs() << "> Functions:\t\t" << IRDB.getModule()->size() << "\n"; + llvm::outs() << "> Global variables:\t" << IRDB.getModule()->global_size() + << "\n"; + llvm::outs() << "> Alloca instructions:\t" + << Stats.getAllocaInstructions().size() << "\n"; + llvm::outs() << "> Call Sites:\t\t" << Stats.getFunctioncalls() << "\n"; } if (EmitterOptions & @@ -216,10 +218,10 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { LLVMTaintConfig AnalysisController::makeTaintConfig() { std::string AnalysisConfigPath = !AnalysisConfigs.empty() ? AnalysisConfigs[0] : ""; + TaintConfigData IfAnalysisConfigPathIsEmpty = TaintConfigData( + HA.getProjectIRDB(), parseTaintConfig(AnalysisConfigPath)); return !AnalysisConfigPath.empty() - ? LLVMTaintConfig(HA.getProjectIRDB(), - TaintConfigData((llvm::Twine)AnalysisConfigPath, - HA.getProjectIRDB())) + ? LLVMTaintConfig(IfAnalysisConfigPathIsEmpty) : LLVMTaintConfig(HA.getProjectIRDB()); } diff --git a/lib/PhasarLLVM/TaintConfig/CMakeLists.txt b/lib/PhasarLLVM/TaintConfig/CMakeLists.txt index 81d9ad6a8..f795ea837 100644 --- a/lib/PhasarLLVM/TaintConfig/CMakeLists.txt +++ b/lib/PhasarLLVM/TaintConfig/CMakeLists.txt @@ -5,8 +5,6 @@ set(PHASAR_LINK_LIBS phasar_db phasar_llvm_db phasar_llvm_utils - phasar_controlflow - phasar_llvm_controlflow ) set(LLVM_LINK_COMPONENTS diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index e914e0073..6852eb1e3 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -9,13 +9,13 @@ #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/Annotation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -24,19 +24,120 @@ namespace psr { -LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const TaintConfigData &Config) { - for (const auto &Value : Config.getSourceValues()) { - SourceValues.insert(Code.getFunction(Value)); +static llvm::SmallVector +findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { + llvm::SmallVector FnDefs; + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &SubProgram : DIF.subprograms()) { + if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && + (SubProgram->getName() == Name || + SubProgram->getLinkageName() == Name)) { + FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); + } } - for (const auto &Value : Config.getSinkValues()) { - SinkValues.insert(Code.getFunction(Value)); + DIF.reset(); + + if (FnDefs.empty()) { + const auto *F = IRDB.getFunction(Name); + if (F) { + FnDefs.push_back(F); + } + } else if (FnDefs.size() > 1) { + llvm::errs() << "The function name '" << Name + << "' is ambiguous. Possible candidates are:\n"; + for (const auto *F : FnDefs) { + llvm::errs() << "> " << F->getName() << "\n"; + } + llvm::errs() << "Please further specify the function's name, such that it " + "becomes unambiguous\n"; } - for (const auto &Value : Config.getSanitizerValues()) { - SanitizerValues.insert(Code.getFunction(Value)); + + return FnDefs; +} + +void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config) { + for (const auto &FunDesc : Config["functions"]) { + auto Name = FunDesc["name"].get(); + + auto FnDefs = findAllFunctionDefs(IRDB, Name); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; + continue; + } + + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); + } + } + } + // handle a function's return value + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + addTaintCategory(User, FunDesc["ret"].get()); + } + } } } +LLVMTaintConfig::LLVMTaintConfig(TaintConfigData &Config) { + SinkValues = Config.getAllSinkValues(); + SourceValues = Config.getAllSourceValues(); + SanitizerValues = Config.getAllSanitizerValues(); +} + LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) { // handle "local" annotation declarations const auto *Annotation = AnnotatedCode.getFunction("llvm.var.annotation"); @@ -317,10 +418,8 @@ LLVMTaintConfig::makeInitialSeedsImpl() const { InitialSeeds[Inst].insert(Inst); } else if (const auto *Arg = llvm::dyn_cast(SourceValue); Arg && !Arg->getParent()->isDeclaration()) { - LLVMBasedCFG C; - for (const auto *SP : C.getStartPointsOf(Arg->getParent())) { - InitialSeeds[SP].insert(Arg); - } + const auto *FunFirstInst = &Arg->getParent()->getEntryBlock().front(); + InitialSeeds[FunFirstInst].insert(Arg); } } return InitialSeeds; diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 4e290f2c3..4c91de332 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -14,9 +14,74 @@ namespace psr { -TaintConfigData::TaintConfigData(const llvm::Twine &Path, - const LLVMProjectIRDB &IRDB) { - loadDataFromFileForThis(Path, IRDB); +TaintConfigData::TaintConfigData(const LLVMProjectIRDB &IRDB, + const nlohmann::json &Config) { + // handle functions + if (Config.contains("functions")) { + addAllFunctions(IRDB, Config); + } + + // handle variables + if (Config.contains("variables")) { + // scope can be a function name or a struct. + std::unordered_map + StructConfigMap; + + // read all struct types from config + for (const auto &VarDesc : Config["variables"]) { + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &Ty : DIF.types()) { + if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && + Ty->getName().equals(VarDesc["scope"].get())) { + for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { + StructConfigMap.insert( + std::pair( + LlvmStructTy, VarDesc)); + } + } + } + DIF.reset(); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + for (const auto &VarDesc : Config["variables"]) { + for (const auto &Fun : IRDB.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = + llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().equals( + VarDesc["name"].get()) && + LocalVar->getLine() == VarDesc["line"].get()) { + addTaintCategory(DbgDeclare->getAddress(), + VarDesc["cat"].get()); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + const auto VarDesc = StructConfigMap.at(StType); + auto VarName = VarDesc["name"].get(); + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { + addTaintCategory(Gep, VarDesc["cat"].get()); + } + } + } + } + } + } + } + } } static llvm::SmallVector @@ -53,46 +118,6 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { return FnDefs; } -void TaintConfigData::addSourceValue(const llvm::Value *V) { - SourceValues.insert(V->getName().str()); -} - -void TaintConfigData::addSinkValue(const llvm::Value *V) { - SinkValues.insert(V->getName().str()); -} - -void TaintConfigData::addSanitizerValue(const llvm::Value *V) { - SanitizerValues.insert(V->getName().str()); -} - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - TaintCategory Annotation) { - switch (Annotation) { - case TaintCategory::Source: - addSourceValue(Val); - break; - case TaintCategory::Sink: - addSinkValue(Val); - break; - case TaintCategory::Sanitizer: - addSanitizerValue(Val); - break; - default: - // ignore - break; - } -} - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - llvm::StringRef AnnotationStr) { - auto TC = toTaintCategory(AnnotationStr); - if (TC == TaintCategory::None) { - PHASAR_LOG_LEVEL(ERROR, "Unknown taint category: " << AnnotationStr); - } else { - addTaintCategory(Val, TC); - } -} - void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, const nlohmann::json &Config) { for (const auto &FunDesc : Config["functions"]) { @@ -167,267 +192,48 @@ void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, } } -void TaintConfigData::addAllVariables(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - // scope can be a function name or a struct. - std::unordered_map StructConfigMap; - - // read all struct types from config - for (const auto &VarDesc : Config["variables"]) { - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); +// +// --- Own API function implementations +// - DIF.processModule(*M); - for (const auto &Ty : DIF.types()) { - if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc["scope"].get())) { - for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { - StructConfigMap.insert( - std::pair(LlvmStructTy, - VarDesc)); - } - } - } - DIF.reset(); - } - - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config["variables"]) { - for (const auto &Fun : IRDB.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().equals(VarDesc["name"].get()) && - LocalVar->getLine() == VarDesc["line"].get()) { - addTaintCategory(DbgDeclare->getAddress(), - VarDesc["cat"].get()); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc["name"].get(); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { - addTaintCategory(Gep, VarDesc["cat"].get()); - } - } - } - } - } - } - } +void TaintConfigData::addSourceValue(const llvm::Value *V) { + SourceValues.insert(V); } -TaintConfigData::TaintConfigData(const nlohmann::json &Config, - const LLVMProjectIRDB &IRDB) { - // handle functions - if (Config.contains("functions")) { - addAllFunctions(IRDB, Config); - } - - // handle variables - if (Config.contains("variables")) { - addAllVariables(IRDB, Config); - } - - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config["variables"]) { - for (const auto &Fun : IRDB.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - // - } - } - } +void TaintConfigData::addSinkValue(const llvm::Value *V) { + SinkValues.insert(V); } -TaintConfigData TaintConfigData::loadDataFromFile(const llvm::Twine &Path, - const LLVMProjectIRDB &IRDB) { - TaintConfigData Data = TaintConfigData(); - nlohmann::json Config = readJsonFile(Path); - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); - - auto FnDefs = findAllFunctionDefs(IRDB, Name); - - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } - - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - Data.addSourceValue(Fun->getArg(Idx)->getName().str()); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - Data.addSinkValue(Fun->getArg(Idx)->getName().str()); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - Data.addSinkValue(Fun->getArg(Idx)->getName().str()); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - Data.addSanitizerValue(Fun->getArg(Idx)->getName().str()); - } - } - } - // handle a function's return value - /* - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - Data.addTaintValue(User, FunDesc["ret"].get()); - } - }*/ - } - - return Data; +void TaintConfigData::addSanitizerValue(const llvm::Value *V) { + SanitizerValues.insert(V); } -void TaintConfigData::addDataToFile(const llvm::Twine &Path) { - nlohmann::json Config; - - for (const auto &Source : SourceValues) { - Config.push_back({"SourceValues", {{Source}}}); - } - - for (const auto &Sink : SinkValues) { - Config.push_back({"SinkValues", {{Sink}}}); - } - - for (const auto &Sanitizer : SanitizerValues) { - Config.push_back({"SanitizerValues", {{Sanitizer}}}); - } - - std::error_code FileError; - llvm::raw_fd_ostream File(Path.str(), FileError); - - if (FileError) { - llvm::errs() << "Error while creating file: " << Path.str() << "\n"; +void TaintConfigData::addTaintCategory(const llvm::Value *Val, + llvm::StringRef AnnotationStr) { + auto TC = toTaintCategory(AnnotationStr); + if (TC == TaintCategory::None) { + PHASAR_LOG_LEVEL(ERROR, "Unknown taint category: " << AnnotationStr); + } else { + addTaintCategory(Val, TC); } - - File << Config; -} - -void TaintConfigData::getValuesFromJSON(nlohmann::json JSON) { - // TODO: } -void TaintConfigData::loadDataFromFileForThis(const llvm::Twine &Path, - const LLVMProjectIRDB &IRDB) { - nlohmann::json Config = readJsonFile(Path); - - if (!Config.contains("functions")) { - return; - } - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); - - auto FnDefs = findAllFunctionDefs(IRDB, Name); - - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } - - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - SourceValues.insert(Fun->getArg(Idx)->getName().str()); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - SinkValues.insert(Fun->getArg(Idx)->getName().str()); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - SinkValues.insert(Fun->getArg(Idx)->getName().str()); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - SanitizerValues.insert(Fun->getArg(Idx)->getName().str()); - } - } - } +void TaintConfigData::addTaintCategory(const llvm::Value *Val, + TaintCategory Annotation) { + switch (Annotation) { + case TaintCategory::Source: + addSourceValue(Val); + break; + case TaintCategory::Sink: + addSinkValue(Val); + break; + case TaintCategory::Sanitizer: + addSanitizerValue(Val); + break; + default: + // ignore + break; } } -} // namespace psr +} // namespace psr \ No newline at end of file diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 63ef715a8..8d7fc1144 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -64,9 +64,9 @@ class IDETaintAnalysisTest : public ::testing::Test { return LLVMTaintConfig(HA.getProjectIRDB()); }, [&](json *JS) { - auto Ret = LLVMTaintConfig( - HA.getProjectIRDB(), - TaintConfigData(*JS, HA.getProjectIRDB())); + TaintConfigData Data = + TaintConfigData(HA.getProjectIRDB(), *JS); + auto Ret = LLVMTaintConfig(Data); if (DumpResults) { llvm::errs() << Ret << "\n"; } diff --git a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp index 0335006cb..8f7ea392b 100644 --- a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp @@ -7,6 +7,7 @@ #include "phasar/PhasarLLVM/Passes/ValueAnnotationPass.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" @@ -34,9 +35,9 @@ class InterMonoTaintAnalysisTest : public ::testing::Test { auto ConfigPath = (PathToLlFiles + "config.json").str(); auto BuildPos = ConfigPath.rfind("/build/") + 1; ConfigPath.erase(BuildPos, 6); - LLVMTaintConfig TC( - HA.getProjectIRDB(), - TaintConfigData(parseTaintConfig(ConfigPath), HA.getProjectIRDB())); + TaintConfigData Data = + TaintConfigData(HA.getProjectIRDB(), parseTaintConfig(ConfigPath)); + LLVMTaintConfig TC(Data); TC.registerSinkCallBack([](const llvm::Instruction *Inst) { std::set Ret; if (const auto *Call = llvm::dyn_cast(Inst); diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 0e3eda105..530c74838 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -229,12 +229,11 @@ static constexpr auto PathToJsonTaintConfigTestCode = TEST_F(TaintConfigTest, Array_01_Json) { const std::string File = "array_01_c_dbg.ll"; const std::string Config = "array_01_config.json"; - auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -247,7 +246,7 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -259,7 +258,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const auto *Bar = IR.getFunction("bar"); assert(Bar); @@ -283,7 +282,7 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); const llvm::Value *I2 = IR.getInstruction(18); @@ -297,7 +296,7 @@ TEST_F(TaintConfigTest, Basic_03_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const auto *TaintPair = IR.getFunction("taintPair"); assert(TaintPair); @@ -315,7 +314,7 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); @@ -328,7 +327,7 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); @@ -343,7 +342,7 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { @@ -367,7 +366,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); @@ -399,7 +398,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); for (const auto *F : IR.getAllFunctions()) { @@ -421,7 +420,7 @@ TEST_F(TaintConfigTest, StaticFun_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); @@ -443,7 +442,7 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig, IR)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *CallInst = IR.getInstruction(13); const auto *I = llvm::dyn_cast(CallInst); From e6e2c305357e62db2508bc5485944c19956a311b Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 27 Jul 2023 14:25:58 +0200 Subject: [PATCH 08/26] basic working version --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 3 ++- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 4 +++- lib/Controller/AnalysisController.cpp | 7 ++---- .../TaintConfig/LLVMTaintConfig.cpp | 2 +- .../TaintConfig/TaintConfigData.cpp | 9 +++---- .../TaintConfig/TaintConfigTest.cpp | 24 +++++++++---------- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index e74d4b1d5..12f0f7f05 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -20,6 +20,7 @@ namespace psr { class LLVMTaintConfig; class LLVMProjectIRDB; +class TaintConfigData; template <> struct TaintConfigTraits { using n_t = const llvm::Instruction *; @@ -31,7 +32,7 @@ class LLVMTaintConfig : public TaintConfigBase { friend TaintConfigBase; public: - explicit LLVMTaintConfig(TaintConfigData &Config); + explicit LLVMTaintConfig(const psr::TaintConfigData &Config); explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode); explicit LLVMTaintConfig( TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 3dfa283c1..199f391e5 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -22,10 +22,12 @@ namespace psr { class TaintConfigData; +class LLVMProjectIRDB; class TaintConfigData { public: - TaintConfigData(const LLVMProjectIRDB &IRDB, const nlohmann::json &Config); + explicit TaintConfigData(const psr::LLVMProjectIRDB &IRDB, + const nlohmann::json &Config); void addSourceValue(const llvm::Value *V); void addSinkValue(const llvm::Value *V); diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index 5f771265b..519293db9 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -218,11 +218,8 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { LLVMTaintConfig AnalysisController::makeTaintConfig() { std::string AnalysisConfigPath = !AnalysisConfigs.empty() ? AnalysisConfigs[0] : ""; - TaintConfigData IfAnalysisConfigPathIsEmpty = TaintConfigData( - HA.getProjectIRDB(), parseTaintConfig(AnalysisConfigPath)); - return !AnalysisConfigPath.empty() - ? LLVMTaintConfig(IfAnalysisConfigPathIsEmpty) - : LLVMTaintConfig(HA.getProjectIRDB()); + + return LLVMTaintConfig(HA.getProjectIRDB()); } } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 6852eb1e3..dfb1ec899 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -132,7 +132,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, } } -LLVMTaintConfig::LLVMTaintConfig(TaintConfigData &Config) { +LLVMTaintConfig::LLVMTaintConfig(const TaintConfigData &Config) { SinkValues = Config.getAllSinkValues(); SourceValues = Config.getAllSourceValues(); SanitizerValues = Config.getAllSanitizerValues(); diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 4c91de332..78af9635c 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -1,5 +1,6 @@ #include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IO.h" #include "phasar/Utils/Logger.h" @@ -146,7 +147,7 @@ void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, // remaining parameters as well continue; } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); + addTaintCategory(Fun->getArg(Idx), "source"); } } if (Params.contains("sink")) { @@ -159,12 +160,12 @@ void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, << Idx << "\n"; continue; } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + addTaintCategory(Fun->getArg(Idx), "Sink"); } else if (Idx.is_string()) { const auto Sinks = Idx.get(); if (Sinks == "all") { for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); + addTaintCategory(&Arg, "sink"); } } } @@ -179,7 +180,7 @@ void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, << Idx << "\n"; continue; } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); + addTaintCategory(Fun->getArg(Idx), "sanitizer"); } } } diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 530c74838..d416dc2f2 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -233,7 +233,7 @@ TEST_F(TaintConfigTest, Array_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -246,7 +246,7 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -258,7 +258,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const auto *Bar = IR.getFunction("bar"); assert(Bar); @@ -282,7 +282,7 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); const llvm::Value *I2 = IR.getInstruction(18); @@ -296,7 +296,7 @@ TEST_F(TaintConfigTest, Basic_03_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const auto *TaintPair = IR.getFunction("taintPair"); assert(TaintPair); @@ -314,7 +314,7 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); @@ -327,7 +327,7 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); @@ -342,7 +342,7 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { @@ -366,7 +366,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); @@ -398,7 +398,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); for (const auto *F : IR.getAllFunctions()) { @@ -420,7 +420,7 @@ TEST_F(TaintConfigTest, StaticFun_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); @@ -442,7 +442,7 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); + psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *CallInst = IR.getInstruction(13); const auto *I = llvm::dyn_cast(CallInst); From 8ce7a13db77aae286764f197afb65f02f1d4965f Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 27 Jul 2023 14:33:40 +0200 Subject: [PATCH 09/26] removed unneccesary includes and functions --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 5 - .../TaintConfig/LLVMTaintConfig.cpp | 109 ------------------ 2 files changed, 114 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index 12f0f7f05..c2c56b7ea 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -91,11 +91,6 @@ class LLVMTaintConfig : public TaintConfigBase { void printImpl(llvm::raw_ostream &OS) const; - // --- utilities - - void addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - // --- data members std::unordered_set SourceValues; diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index dfb1ec899..ef56f86c4 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -15,7 +15,6 @@ #include "phasar/PhasarLLVM/Utils/Annotation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -24,114 +23,6 @@ namespace psr { -static llvm::SmallVector -findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { - llvm::SmallVector FnDefs; - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); - - DIF.processModule(*M); - for (const auto &SubProgram : DIF.subprograms()) { - if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && - (SubProgram->getName() == Name || - SubProgram->getLinkageName() == Name)) { - FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); - } - } - DIF.reset(); - - if (FnDefs.empty()) { - const auto *F = IRDB.getFunction(Name); - if (F) { - FnDefs.push_back(F); - } - } else if (FnDefs.size() > 1) { - llvm::errs() << "The function name '" << Name - << "' is ambiguous. Possible candidates are:\n"; - for (const auto *F : FnDefs) { - llvm::errs() << "> " << F->getName() << "\n"; - } - llvm::errs() << "Please further specify the function's name, such that it " - "becomes unambiguous\n"; - } - - return FnDefs; -} - -void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); - - auto FnDefs = findAllFunctionDefs(IRDB, Name); - - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } - - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); - } - } - } - // handle a function's return value - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc["ret"].get()); - } - } - } -} - LLVMTaintConfig::LLVMTaintConfig(const TaintConfigData &Config) { SinkValues = Config.getAllSinkValues(); SourceValues = Config.getAllSourceValues(); From 135a2b79f90dd051ebcf6ba421b064918fa6f324 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 4 Aug 2023 11:13:27 +0200 Subject: [PATCH 10/26] new TaintConfigData structure --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 9 +- .../PhasarLLVM/TaintConfig/TaintConfigBase.h | 7 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 61 ++-- lib/Controller/AnalysisController.cpp | 13 - .../TaintConfig/LLVMTaintConfig.cpp | 182 +++++++++- .../TaintConfig/TaintConfigBase.cpp | 41 +-- .../TaintConfig/TaintConfigData.cpp | 324 +++++++----------- 7 files changed, 347 insertions(+), 290 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index c2c56b7ea..9fda92774 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -11,7 +11,6 @@ #define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIG_H #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "llvm/IR/Instruction.h" @@ -32,7 +31,8 @@ class LLVMTaintConfig : public TaintConfigBase { friend TaintConfigBase; public: - explicit LLVMTaintConfig(const psr::TaintConfigData &Config); + explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const psr::TaintConfigData &Config); explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode); explicit LLVMTaintConfig( TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, @@ -91,6 +91,11 @@ class LLVMTaintConfig : public TaintConfigBase { void printImpl(llvm::raw_ostream &OS) const; + // --- utilities + + void addAllFunctions(const LLVMProjectIRDB &IRDB, + const TaintConfigData &Config); + // --- data members std::unordered_set SourceValues; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h index 94979d2af..478c8df9d 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h @@ -10,6 +10,7 @@ #ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/Utils/Nullable.h" #include "llvm/ADT/FunctionExtras.h" @@ -17,8 +18,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" -#include "nlohmann/json.hpp" - #include #include #include @@ -159,8 +158,8 @@ template class TaintConfigBase { //===----------------------------------------------------------------------===// // Miscellaneous helper functions -nlohmann::json parseTaintConfig(const llvm::Twine &Path); -std::optional parseTaintConfigOrNull(const llvm::Twine &Path); +TaintConfigData parseTaintConfig(const llvm::Twine &Path); +std::optional parseTaintConfigOrNull(const llvm::Twine &Path); } // namespace psr diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 199f391e5..317e9751d 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -10,49 +10,44 @@ #ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" - -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Value.h" - +#include #include -#include - namespace psr { class TaintConfigData; class LLVMProjectIRDB; class TaintConfigData { public: - explicit TaintConfigData(const psr::LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - - void addSourceValue(const llvm::Value *V); - void addSinkValue(const llvm::Value *V); - void addSanitizerValue(const llvm::Value *V); - void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); - void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); - // --- utilities - - void addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - - inline std::unordered_set getAllSourceValues() const { - return SourceValues; - } - inline std::unordered_set getAllSinkValues() const { - return SinkValues; - } - inline std::unordered_set getAllSanitizerValues() const { - return SanitizerValues; - } + TaintConfigData() = default; + explicit TaintConfigData(const std::string &Filepath); + + const std::unordered_set &getAllFunctionRets() const; + const std::unordered_set &getAllFunctionParamsSources() const; + const std::unordered_set &getAllFunctionParamsSinks() const; + const std::unordered_set &getAllFunctionParamsSanitizers() const; + + const std::unordered_set &getAllVariableScopes() const; + const std::unordered_set &getAllVariableLines() const; + const std::unordered_set &getAllVariableCats() const; + const std::unordered_set &getAllVariableNames() const; + + const std::unordered_set &getAllFunctions() const; + const std::unordered_set &getAllVariables() const; private: - std::unordered_set SourceValues; - std::unordered_set SinkValues; - std::unordered_set SanitizerValues; + std::unordered_set Functions; + std::unordered_set Variables; + + std::unordered_set FunctionRets; + std::unordered_set FunctionParamsSources; + std::unordered_set FunctionParamsSinks; + std::unordered_set FunctionParamsSanitizers; + + std::unordered_set VariableScopes; + std::unordered_set VariableLines; + std::unordered_set VariableCats; + std::unordered_set VariableNames; }; } // namespace psr diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index 519293db9..a782ef809 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -194,19 +194,6 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { GeneralStatisticsAnalysis GSA; const auto &Stats = GSA.runOnModule(*IRDB.getModule()); - if (EmitterOptions & - AnalysisControllerEmitterOptions::EmitStatisticsAsText) { - llvm::outs() << "Module " << IRDB.getModule()->getName() << ":\n"; - llvm::outs() << "> LLVM IR instructions:\t" << IRDB.getNumInstructions() - << "\n"; - llvm::outs() << "> Functions:\t\t" << IRDB.getModule()->size() << "\n"; - llvm::outs() << "> Global variables:\t" << IRDB.getModule()->global_size() - << "\n"; - llvm::outs() << "> Alloca instructions:\t" - << Stats.getAllocaInstructions().size() << "\n"; - llvm::outs() << "> Call Sites:\t\t" << Stats.getFunctioncalls() << "\n"; - } - if (EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsJson) { WithResultFileOrStdout("/psr-IrStatistics.json", diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index ef56f86c4..dd1604389 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -20,13 +20,187 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" namespace psr { -LLVMTaintConfig::LLVMTaintConfig(const TaintConfigData &Config) { - SinkValues = Config.getAllSinkValues(); - SourceValues = Config.getAllSourceValues(); - SanitizerValues = Config.getAllSanitizerValues(); +static llvm::SmallVector +findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { + llvm::SmallVector FnDefs; + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &SubProgram : DIF.subprograms()) { + if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && + (SubProgram->getName() == Name || + SubProgram->getLinkageName() == Name)) { + FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); + } + } + DIF.reset(); + + if (FnDefs.empty()) { + const auto *F = IRDB.getFunction(Name); + if (F) { + FnDefs.push_back(F); + } + } else if (FnDefs.size() > 1) { + llvm::errs() << "The function name '" << Name + << "' is ambiguous. Possible candidates are:\n"; + for (const auto *F : FnDefs) { + llvm::errs() << "> " << F->getName() << "\n"; + } + llvm::errs() << "Please further specify the function's name, such that it " + "becomes unambiguous\n"; + } + + return FnDefs; +} + +void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, + const TaintConfigData &Config) { + for (const auto &Source : Config.getAllFunctionParamsSources()) { + auto FnDefs = findAllFunctionDefs(IRDB, Source); + } + + for (const auto &FunDesc : Config.getAllFunctions()) { + auto FnDefs = findAllFunctionDefs(IRDB, FunDesc); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << FunDesc << "\n"; + continue; + } + + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); + } + } + } + // handle a function's return value + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + addTaintCategory(User, FunDesc["ret"].get()); + } + } + } +} + +LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const psr::TaintConfigData &Config) { + // handle functions + if (Config.hasFunctions()) { + addAllFunctions(Code, Config); + } + + // handle variables + if (Config.hasVariables()) { + // scope can be a function name or a struct. + std::unordered_map + StructConfigMap; + + // read all struct types from config + for (const auto &VarDesc : Config.getAllVariables()) { + llvm::DebugInfoFinder DIF; + const auto *M = Code.getModule(); + + DIF.processModule(*M); + for (const auto &Ty : DIF.types()) { + if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && + Ty->getName().equals(VarDesc["scope"].get())) { + for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { + StructConfigMap.insert( + std::pair( + LlvmStructTy, VarDesc)); + } + } + } + DIF.reset(); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + for (const auto &VarDesc : Config.getAllVariables()) { + for (const auto &Fun : Code.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = + llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().equals(VarDesc) && + LocalVar->getLine() == VarDesc["line"].get()) { + addTaintCategory(DbgDeclare->getAddress(), + VarDesc["cat"].get()); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + const auto VarDesc = StructConfigMap.at(StType); + auto VarName = VarDesc["name"].get(); + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { + addTaintCategory(Gep, VarDesc["cat"].get()); + } + } + } + } + } + } + } + } } LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) { diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 34e2d2556..01e462415 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -7,9 +7,6 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include "nlohmann/json-schema.hpp" -#include "nlohmann/json.hpp" - #include llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { @@ -34,46 +31,10 @@ psr::TaintCategory psr::toTaintCategory(llvm::StringRef Str) noexcept { .Default(TaintCategory::None); } -nlohmann::json psr::parseTaintConfig(const llvm::Twine &Path) { +psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { auto Ret = parseTaintConfigOrNull(Path); if (!Ret) { return {}; } return std::move(*Ret); } - -std::optional -psr::parseTaintConfigOrNull(const llvm::Twine &Path) { - std::optional TaintConfig = readJsonFile(Path); - nlohmann::json_schema::json_validator Validator; - try { - static const nlohmann::json TaintConfigSchema = -#include "../config/TaintConfigSchema.json" - ; - - Validator.set_root_schema(TaintConfigSchema); // insert root-schema - } catch (const std::exception &E) { - PHASAR_LOG_LEVEL(ERROR, - "Validation of schema failed, here is why: " << E.what()); - return std::nullopt; - } - - // a custom error handler - class CustomJsonErrorHandler - : public nlohmann::json_schema::basic_error_handler { - void error(const nlohmann::json::json_pointer &Pointer, - const nlohmann::json &Instance, - const std::string &Message) override { - nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, - Message); - PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() - << "' - '" << Instance << "': " << Message); - } - }; - CustomJsonErrorHandler Err; - Validator.validate(*TaintConfig, Err); - if (Err) { - TaintConfig.reset(); - } - return TaintConfig; -} diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 78af9635c..1422cd5d3 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -11,230 +11,166 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" +#include "nlohmann/json-schema.hpp" +#include "nlohmann/json.hpp" + #include namespace psr { -TaintConfigData::TaintConfigData(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - // handle functions - if (Config.contains("functions")) { - addAllFunctions(IRDB, Config); +std::optional +parseTaintConfigOrNull(const llvm::Twine &Path) { + std::optional TaintConfig = readJsonFile(Path); + nlohmann::json_schema::json_validator Validator; + try { + static const nlohmann::json TaintConfigSchema = +#include "../config/TaintConfigSchema.json" + ; + + Validator.set_root_schema(TaintConfigSchema); // insert root-schema + } catch (const std::exception &E) { + PHASAR_LOG_LEVEL(ERROR, + "Validation of schema failed, here is why: " << E.what()); + return std::nullopt; } - // handle variables - if (Config.contains("variables")) { - // scope can be a function name or a struct. - std::unordered_map - StructConfigMap; - - // read all struct types from config - for (const auto &VarDesc : Config["variables"]) { - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); - - DIF.processModule(*M); - for (const auto &Ty : DIF.types()) { - if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc["scope"].get())) { - for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { - StructConfigMap.insert( - std::pair( - LlvmStructTy, VarDesc)); - } - } - } - DIF.reset(); + // a custom error handler + class CustomJsonErrorHandler + : public nlohmann::json_schema::basic_error_handler { + void error(const nlohmann::json::json_pointer &Pointer, + const nlohmann::json &Instance, + const std::string &Message) override { + nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, + Message); + PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() + << "' - '" << Instance << "': " << Message); } + }; + CustomJsonErrorHandler Err; + Validator.validate(*TaintConfig, Err); + if (Err) { + TaintConfig.reset(); + } + return std::optional(Path.str()); +} - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config["variables"]) { - for (const auto &Fun : IRDB.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = - llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().equals( - VarDesc["name"].get()) && - LocalVar->getLine() == VarDesc["line"].get()) { - addTaintCategory(DbgDeclare->getAddress(), - VarDesc["cat"].get()); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc["name"].get(); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { - addTaintCategory(Gep, VarDesc["cat"].get()); - } - } - } - } - } - } +void findAndAddValue(const nlohmann::json &Config, const std::string &Value, + std::unordered_set &Container) { + if (Config.contains(Value)) { + for (const auto &Curr : Config[Value]) { + Container.insert(Curr); } } } -static llvm::SmallVector -findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { - llvm::SmallVector FnDefs; - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); +void addAllFunctionRets(const nlohmann::json &Function, + std::unordered_set &Container) { + findAndAddValue(Function, "ret", Container); +} - DIF.processModule(*M); - for (const auto &SubProgram : DIF.subprograms()) { - if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && - (SubProgram->getName() == Name || - SubProgram->getLinkageName() == Name)) { - FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); - } - } - DIF.reset(); +void addAllFunctionParamsSources(const nlohmann::json &Param, + std::unordered_set &Container) { + findAndAddValue(Param, "source", Container); +} - if (FnDefs.empty()) { - const auto *F = IRDB.getFunction(Name); - if (F) { - FnDefs.push_back(F); - } - } else if (FnDefs.size() > 1) { - llvm::errs() << "The function name '" << Name - << "' is ambiguous. Possible candidates are:\n"; - for (const auto *F : FnDefs) { - llvm::errs() << "> " << F->getName() << "\n"; - } - llvm::errs() << "Please further specify the function's name, such that it " - "becomes unambiguous\n"; - } +void addAllFunctionParamsSinks(const nlohmann::json &Param, + std::unordered_set &Container) { + findAndAddValue(Param, "sink", Container); +} - return FnDefs; +void addAllFunctionParamsSanitizers( + const nlohmann::json &Param, std::unordered_set &Container) { + findAndAddValue(Param, "sanitizer", Container); } -void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); +void addAllVariableScopes(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "scope", Container); +} - auto FnDefs = findAllFunctionDefs(IRDB, Name); +void addAllVariableLines(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "line", Container); +} - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } +void addAllVariableCats(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "cat", Container); +} - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - addTaintCategory(Fun->getArg(Idx), "source"); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), "Sink"); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, "sink"); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), "sanitizer"); - } +void addAllVariableNames(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "name", Container); +} + +TaintConfigData::TaintConfigData(const std::string &Filepath) { + + nlohmann::json Config(Filepath); + + // handle functions + if (Config.contains("functions")) { + for (auto &Function : Config["functions"]) { + addAllFunctionRets(Function, FunctionRets); + + if (Function.contains("params")) { + addAllFunctionParamsSources(Function["params"], FunctionParamsSources); + addAllFunctionParamsSinks(Function["params"], FunctionParamsSinks); + addAllFunctionParamsSanitizers(Function["params"], + FunctionParamsSanitizers); } } - // handle a function's return value - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc["ret"].get()); - } + } + + // handle variables + if (Config.contains("variables")) { + for (auto &Variable : Config["variables"]) { + addAllVariableScopes(Variable, VariableScopes); + addAllVariableLines(Variable, VariableLines); + addAllVariableCats(Variable, VariableCats); + addAllVariableNames(Variable, VariableNames); } } } -// -// --- Own API function implementations -// - -void TaintConfigData::addSourceValue(const llvm::Value *V) { - SourceValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctions() const { + return Functions; } - -void TaintConfigData::addSinkValue(const llvm::Value *V) { - SinkValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctionRets() const { + return FunctionRets; } - -void TaintConfigData::addSanitizerValue(const llvm::Value *V) { - SanitizerValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSources() const { + return FunctionParamsSources; } - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - llvm::StringRef AnnotationStr) { - auto TC = toTaintCategory(AnnotationStr); - if (TC == TaintCategory::None) { - PHASAR_LOG_LEVEL(ERROR, "Unknown taint category: " << AnnotationStr); - } else { - addTaintCategory(Val, TC); - } +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSinks() const { + return FunctionParamsSinks; } - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - TaintCategory Annotation) { - switch (Annotation) { - case TaintCategory::Source: - addSourceValue(Val); - break; - case TaintCategory::Sink: - addSinkValue(Val); - break; - case TaintCategory::Sanitizer: - addSanitizerValue(Val); - break; - default: - // ignore - break; - } +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSanitizers() const { + return FunctionParamsSanitizers; +} +const std::unordered_set & +TaintConfigData::getAllVariables() const { + return Variables; +} +const std::unordered_set & +TaintConfigData::getAllVariableScopes() const { + return VariableScopes; +} +const std::unordered_set & +TaintConfigData::getAllVariableLines() const { + return VariableLines; +} +const std::unordered_set & +TaintConfigData::getAllVariableCats() const { + return VariableCats; +} +const std::unordered_set & +TaintConfigData::getAllVariableNames() const { + return VariableNames; } } // namespace psr \ No newline at end of file From 4ed0efd8261bdd11648c18c57480ffdd83ea6898 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 4 Aug 2023 16:09:51 +0200 Subject: [PATCH 11/26] fully refactored, doesn't compile --- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 51 ++--- .../TaintConfig/LLVMTaintConfig.cpp | 181 ++++++------------ .../TaintConfig/TaintConfigData.cpp | 113 +++++------ .../Problems/IDEExtendedTaintAnalysisTest.cpp | 6 +- .../Mono/InterMonoTaintAnalysisTest.cpp | 5 +- .../TaintConfig/TaintConfigTest.cpp | 24 +-- 6 files changed, 153 insertions(+), 227 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 317e9751d..d9546ae1b 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -11,7 +11,7 @@ #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H #include -#include +#include namespace psr { class TaintConfigData; @@ -22,32 +22,37 @@ class TaintConfigData { TaintConfigData() = default; explicit TaintConfigData(const std::string &Filepath); - const std::unordered_set &getAllFunctionRets() const; - const std::unordered_set &getAllFunctionParamsSources() const; - const std::unordered_set &getAllFunctionParamsSinks() const; - const std::unordered_set &getAllFunctionParamsSanitizers() const; + [[nodiscard]] const std::vector &getAllFunctionNames() const; + [[nodiscard]] const std::vector &getAllFunctionRets() const; + [[nodiscard]] const std::vector & + getAllFunctionParamsSources() const; + [[nodiscard]] const std::vector & + getAllFunctionParamsSinks() const; + [[nodiscard]] const std::vector & + getAllFunctionParamsSanitizers() const; - const std::unordered_set &getAllVariableScopes() const; - const std::unordered_set &getAllVariableLines() const; - const std::unordered_set &getAllVariableCats() const; - const std::unordered_set &getAllVariableNames() const; + [[nodiscard]] const std::vector &getAllVariableScopes() const; + [[nodiscard]] const std::vector &getAllVariableLines() const; + [[nodiscard]] const std::vector &getAllVariableCats() const; + [[nodiscard]] const std::vector &getAllVariableNames() const; - const std::unordered_set &getAllFunctions() const; - const std::unordered_set &getAllVariables() const; + [[nodiscard]] const std::vector &getAllFunctions() const; + [[nodiscard]] const std::vector &getAllVariables() const; private: - std::unordered_set Functions; - std::unordered_set Variables; - - std::unordered_set FunctionRets; - std::unordered_set FunctionParamsSources; - std::unordered_set FunctionParamsSinks; - std::unordered_set FunctionParamsSanitizers; - - std::unordered_set VariableScopes; - std::unordered_set VariableLines; - std::unordered_set VariableCats; - std::unordered_set VariableNames; + std::vector Functions; + std::vector Variables; + + std::vector FunctionNames; + std::vector FunctionRets; + std::vector FunctionParamSources; + std::vector FunctionParamSinks; + std::vector FunctionParamSanitizers; + + std::vector VariableScopes; + std::vector VariableLines; + std::vector VariableCats; + std::vector VariableNames; }; } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index dd1604389..ccba790c3 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -11,16 +11,17 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/Annotation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Value.h" + +#include namespace psr { @@ -60,145 +61,75 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const TaintConfigData &Config) { - for (const auto &Source : Config.getAllFunctionParamsSources()) { - auto FnDefs = findAllFunctionDefs(IRDB, Source); - } - + int Counter = -1; for (const auto &FunDesc : Config.getAllFunctions()) { - auto FnDefs = findAllFunctionDefs(IRDB, FunDesc); + Counter++; + auto Name = Config.getAllFunctionNames()[Counter]; + + auto FnDefs = findAllFunctionDefs(IRDB, Name); if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << FunDesc << "\n"; + llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; continue; } const auto *Fun = FnDefs[0]; - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); - } + // handle a function's source parameters + for (const auto &Param : Config.getAllFunctionParamsSources()) { + unsigned Idx = std::stoi(Param); + + if (Idx >= Fun->arg_size()) { + llvm::errs() << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } - } - } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); + } + for (const auto &Param : Config.getAllFunctionParamsSinks()) { + char *Check; + long Converted = strtol(Param.c_str(), &Check, Param.size()); + + if (!Check) { + unsigned Idx = std::stoi(Param); + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + } else { + if (Param == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); } } } - // handle a function's return value - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc["ret"].get()); - } - } - } -} - -LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, - const psr::TaintConfigData &Config) { - // handle functions - if (Config.hasFunctions()) { - addAllFunctions(Code, Config); - } - - // handle variables - if (Config.hasVariables()) { - // scope can be a function name or a struct. - std::unordered_map - StructConfigMap; - - // read all struct types from config - for (const auto &VarDesc : Config.getAllVariables()) { - llvm::DebugInfoFinder DIF; - const auto *M = Code.getModule(); - - DIF.processModule(*M); - for (const auto &Ty : DIF.types()) { - if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc["scope"].get())) { - for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { - StructConfigMap.insert( - std::pair( - LlvmStructTy, VarDesc)); - } + for (const auto &Param : Config.getAllFunctionParamsSanitizers()) { + char *Check; + long Converted = strtol(Param.c_str(), &Check, Param.size()); + unsigned Idx = std::stoi(Param); + + if (!Check) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); } - DIF.reset(); } - - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config.getAllVariables()) { - for (const auto &Fun : Code.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = - llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().equals(VarDesc) && - LocalVar->getLine() == VarDesc["line"].get()) { - addTaintCategory(DbgDeclare->getAddress(), - VarDesc["cat"].get()); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc["name"].get(); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { - addTaintCategory(Gep, VarDesc["cat"].get()); - } - } - } - } - } - } + // handle a function's return value + for (const auto &User : Fun->users()) { + addTaintCategory(User, Config.getAllFunctionRets()[Counter]); } } } @@ -514,4 +445,4 @@ void LLVMTaintConfig::printImpl(llvm::raw_ostream &OS) const { } template class TaintConfigBase; -} // namespace psr +} // namespace psr \ No newline at end of file diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 1422cd5d3..efd7685f5 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -55,121 +55,112 @@ parseTaintConfigOrNull(const llvm::Twine &Path) { } void findAndAddValue(const nlohmann::json &Config, const std::string &Value, - std::unordered_set &Container) { + std::vector &Container) { if (Config.contains(Value)) { for (const auto &Curr : Config[Value]) { - Container.insert(Curr); + Container.push_back(Curr); } } } +void addAllFunctions(const nlohmann::json &Config, + std::vector &Container) { + findAndAddValue(Config, "functions", Container); +} + +void addAllFunctionNames(const nlohmann::json &Function, + std::vector &Container) { + findAndAddValue(Function, "name", Container); +} + void addAllFunctionRets(const nlohmann::json &Function, - std::unordered_set &Container) { + std::vector &Container) { findAndAddValue(Function, "ret", Container); } void addAllFunctionParamsSources(const nlohmann::json &Param, - std::unordered_set &Container) { + std::vector &Container) { findAndAddValue(Param, "source", Container); } void addAllFunctionParamsSinks(const nlohmann::json &Param, - std::unordered_set &Container) { + std::vector &Container) { findAndAddValue(Param, "sink", Container); } -void addAllFunctionParamsSanitizers( - const nlohmann::json &Param, std::unordered_set &Container) { +void addAllFunctionParamsSanitizers(const nlohmann::json &Param, + std::vector &Container) { findAndAddValue(Param, "sanitizer", Container); } -void addAllVariableScopes(const nlohmann::json &Variable, - std::unordered_set &Container) { - findAndAddValue(Variable, "scope", Container); -} - -void addAllVariableLines(const nlohmann::json &Variable, - std::unordered_set &Container) { - findAndAddValue(Variable, "line", Container); -} - -void addAllVariableCats(const nlohmann::json &Variable, - std::unordered_set &Container) { - findAndAddValue(Variable, "cat", Container); -} - -void addAllVariableNames(const nlohmann::json &Variable, - std::unordered_set &Container) { - findAndAddValue(Variable, "name", Container); -} - TaintConfigData::TaintConfigData(const std::string &Filepath) { nlohmann::json Config(Filepath); // handle functions if (Config.contains("functions")) { - for (auto &Function : Config["functions"]) { - addAllFunctionRets(Function, FunctionRets); - - if (Function.contains("params")) { - addAllFunctionParamsSources(Function["params"], FunctionParamsSources); - addAllFunctionParamsSinks(Function["params"], FunctionParamsSinks); - addAllFunctionParamsSanitizers(Function["params"], - FunctionParamsSanitizers); - } + for (const auto &Func : Config["functions"]) { + // A functions name should be at the same index in the names array and the + // functions array + Functions.push_back(Func); + + findAndAddValue(Func, "name", FunctionNames); + findAndAddValue(Func, "ret", FunctionRets); + findAndAddValue(Func["params"], "source", FunctionParamSources); + findAndAddValue(Func["params"], "sink", FunctionParamSinks); + findAndAddValue(Func["params"], "sanitizer", FunctionParamSanitizers); } } // handle variables if (Config.contains("variables")) { - for (auto &Variable : Config["variables"]) { - addAllVariableScopes(Variable, VariableScopes); - addAllVariableLines(Variable, VariableLines); - addAllVariableCats(Variable, VariableCats); - addAllVariableNames(Variable, VariableNames); + for (const auto &Var : Config["variables"]) { + // A variables name should be at the same index in the names array and the + // variables array + Variables.push_back(Var); + + findAndAddValue(Config["variables"], "name", VariableNames); + findAndAddValue(Config["variables"], "scope", Variables); + findAndAddValue(Config["variables"], "line", VariableLines); + findAndAddValue(Config["variables"], "cat", VariableCats); } } } -const std::unordered_set & -TaintConfigData::getAllFunctions() const { +const std::vector &TaintConfigData::getAllFunctions() const { return Functions; } -const std::unordered_set & -TaintConfigData::getAllFunctionRets() const { +const std::vector &TaintConfigData::getAllFunctionNames() const { + return FunctionNames; +} +const std::vector &TaintConfigData::getAllFunctionRets() const { return FunctionRets; } -const std::unordered_set & +const std::vector & TaintConfigData::getAllFunctionParamsSources() const { - return FunctionParamsSources; + return FunctionParamSources; } -const std::unordered_set & +const std::vector & TaintConfigData::getAllFunctionParamsSinks() const { - return FunctionParamsSinks; + return FunctionParamSinks; } -const std::unordered_set & +const std::vector & TaintConfigData::getAllFunctionParamsSanitizers() const { - return FunctionParamsSanitizers; + return FunctionParamSanitizers; } -const std::unordered_set & -TaintConfigData::getAllVariables() const { +const std::vector &TaintConfigData::getAllVariables() const { return Variables; } -const std::unordered_set & -TaintConfigData::getAllVariableScopes() const { +const std::vector &TaintConfigData::getAllVariableScopes() const { return VariableScopes; } -const std::unordered_set & -TaintConfigData::getAllVariableLines() const { +const std::vector &TaintConfigData::getAllVariableLines() const { return VariableLines; } -const std::unordered_set & -TaintConfigData::getAllVariableCats() const { +const std::vector &TaintConfigData::getAllVariableCats() const { return VariableCats; } -const std::unordered_set & -TaintConfigData::getAllVariableNames() const { +const std::vector &TaintConfigData::getAllVariableNames() const { return VariableNames; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 8d7fc1144..43396faf5 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -64,9 +64,9 @@ class IDETaintAnalysisTest : public ::testing::Test { return LLVMTaintConfig(HA.getProjectIRDB()); }, [&](json *JS) { - TaintConfigData Data = - TaintConfigData(HA.getProjectIRDB(), *JS); - auto Ret = LLVMTaintConfig(Data); + TaintConfigData Data = TaintConfigData(*JS); + auto Ret = + LLVMTaintConfig(HA.getProjectIRDB(), Data); if (DumpResults) { llvm::errs() << Ret << "\n"; } diff --git a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp index 8f7ea392b..94ce9b1fb 100644 --- a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp @@ -35,9 +35,8 @@ class InterMonoTaintAnalysisTest : public ::testing::Test { auto ConfigPath = (PathToLlFiles + "config.json").str(); auto BuildPos = ConfigPath.rfind("/build/") + 1; ConfigPath.erase(BuildPos, 6); - TaintConfigData Data = - TaintConfigData(HA.getProjectIRDB(), parseTaintConfig(ConfigPath)); - LLVMTaintConfig TC(Data); + TaintConfigData Data = TaintConfigData(parseTaintConfig(ConfigPath)); + LLVMTaintConfig TC(HA.getProjectIRDB(), Data); TC.registerSinkCallBack([](const llvm::Instruction *Inst) { std::set Ret; if (const auto *Call = llvm::dyn_cast(Inst); diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index d416dc2f2..8caadf83f 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -233,7 +233,7 @@ TEST_F(TaintConfigTest, Array_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -246,7 +246,7 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -258,7 +258,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const auto *Bar = IR.getFunction("bar"); assert(Bar); @@ -282,7 +282,7 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); const llvm::Value *I2 = IR.getInstruction(18); @@ -296,7 +296,7 @@ TEST_F(TaintConfigTest, Basic_03_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const auto *TaintPair = IR.getFunction("taintPair"); assert(TaintPair); @@ -314,7 +314,7 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); @@ -327,7 +327,7 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); @@ -342,7 +342,7 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { @@ -366,7 +366,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); @@ -398,7 +398,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); for (const auto *F : IR.getAllFunctions()) { @@ -420,7 +420,7 @@ TEST_F(TaintConfigTest, StaticFun_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); @@ -442,7 +442,7 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(psr::TaintConfigData(IR, JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); llvm::outs() << TConfig << '\n'; const llvm::Value *CallInst = IR.getInstruction(13); const auto *I = llvm::dyn_cast(CallInst); From 39c1ca284f19ed05548e5709827c1816d6bf8c20 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 17 Aug 2023 11:17:55 +0200 Subject: [PATCH 12/26] added func/var structs --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 1 + .../PhasarLLVM/TaintConfig/TaintConfigData.h | 57 ++++---- .../TaintConfig/LLVMTaintConfig.cpp | 128 ++++++++++++------ .../TaintConfig/TaintConfigData.cpp | 102 +++++++------- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 3 +- 5 files changed, 161 insertions(+), 130 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index 9fda92774..3b403e332 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -15,6 +15,7 @@ #include "llvm/IR/Instruction.h" #include +#include namespace psr { class LLVMTaintConfig; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index d9546ae1b..85cf0131b 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -14,45 +14,34 @@ #include namespace psr { -class TaintConfigData; +struct TaintConfigData; class LLVMProjectIRDB; -class TaintConfigData { -public: +struct FunctionData { + FunctionData() = default; + + std::string Name; + std::string ReturnType; + std::vector SourceValues; + std::vector SinkValues; + std::vector SanitizerValues; +}; + +struct VariableData { + VariableData() = default; + + size_t Line{}; + std::string Name; + std::string Scope; + std::string Cat; +}; + +struct TaintConfigData { TaintConfigData() = default; explicit TaintConfigData(const std::string &Filepath); - [[nodiscard]] const std::vector &getAllFunctionNames() const; - [[nodiscard]] const std::vector &getAllFunctionRets() const; - [[nodiscard]] const std::vector & - getAllFunctionParamsSources() const; - [[nodiscard]] const std::vector & - getAllFunctionParamsSinks() const; - [[nodiscard]] const std::vector & - getAllFunctionParamsSanitizers() const; - - [[nodiscard]] const std::vector &getAllVariableScopes() const; - [[nodiscard]] const std::vector &getAllVariableLines() const; - [[nodiscard]] const std::vector &getAllVariableCats() const; - [[nodiscard]] const std::vector &getAllVariableNames() const; - - [[nodiscard]] const std::vector &getAllFunctions() const; - [[nodiscard]] const std::vector &getAllVariables() const; - -private: - std::vector Functions; - std::vector Variables; - - std::vector FunctionNames; - std::vector FunctionRets; - std::vector FunctionParamSources; - std::vector FunctionParamSinks; - std::vector FunctionParamSanitizers; - - std::vector VariableScopes; - std::vector VariableLines; - std::vector VariableCats; - std::vector VariableNames; + std::vector Functions; + std::vector Variables; }; } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index ccba790c3..cce410357 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -14,7 +14,6 @@ #include "phasar/PhasarLLVM/Utils/Annotation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/NlohmannLogging.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -62,9 +61,9 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const TaintConfigData &Config) { int Counter = -1; - for (const auto &FunDesc : Config.getAllFunctions()) { + for (const auto &FunDesc : Config.Functions) { Counter++; - auto Name = Config.getAllFunctionNames()[Counter]; + auto Name = FunDesc.Name; auto FnDefs = findAllFunctionDefs(IRDB, Name); @@ -76,9 +75,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const auto *Fun = FnDefs[0]; // handle a function's source parameters - for (const auto &Param : Config.getAllFunctionParamsSources()) { - unsigned Idx = std::stoi(Param); - + for (const auto &Idx : FunDesc.SourceValues) { if (Idx >= Fun->arg_size()) { llvm::errs() << "ERROR: The source-function parameter index is out of " "bounds: " @@ -89,47 +86,96 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, } addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); } - for (const auto &Param : Config.getAllFunctionParamsSinks()) { - char *Check; - long Converted = strtol(Param.c_str(), &Check, Param.size()); - - if (!Check) { - unsigned Idx = std::stoi(Param); - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); - } else { - if (Param == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } - } + for (const auto &Idx : FunDesc.SinkValues) { + if (Idx >= Fun->arg_size()) { + llvm::errs() << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } - for (const auto &Param : Config.getAllFunctionParamsSanitizers()) { - char *Check; - long Converted = strtol(Param.c_str(), &Check, Param.size()); - unsigned Idx = std::stoi(Param); - - if (!Check) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); + + for (const auto &Idx : FunDesc.SanitizerValues) { + if (Idx >= Fun->arg_size()) { + llvm::errs() << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); } // handle a function's return value for (const auto &User : Fun->users()) { - addTaintCategory(User, Config.getAllFunctionRets()[Counter]); + addTaintCategory(User, FunDesc.ReturnType); + } + } +} + +LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const TaintConfigData &Config) { + // handle functions + addAllFunctions(Code, Config); + + // handle variables + // scope can be a function name or a struct. + std::unordered_map StructConfigMap; + + // read all struct types from config + size_t Counter = 0; + for (const auto &VarDesc : Config.Variables) { + llvm::DebugInfoFinder DIF; + const auto *M = Code.getModule(); + + DIF.processModule(*M); + for (const auto &Ty : DIF.types()) { + if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && + Ty->getName().equals(VarDesc.Name)) { + for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { + StructConfigMap.insert( + std::pair(LlvmStructTy, + VarDesc.Name)); + } + } + } + DIF.reset(); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + const auto &ConfigFunctions = Config.getAllFunctions(); + const auto &ConfigFunctionNames = Config.getAllFunctionNames(); + const auto &ConfigVariableLine = Config.getAllVariableLines(); + const auto &ConfigVariableCat = Config.getAllVariableCats(); + int Iter = -1; + for (const auto &Fun : Code.getAllFunctions()) { + Iter++; + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().str() == ConfigFunctionNames.at(Iter) && + (std::to_string(LocalVar->getLine()) == + ConfigVariableLine.at(Iter))) { + addTaintCategory(DbgDeclare->getAddress(), + ConfigVariableCat.at(Iter)); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + const auto VarDesc = StructConfigMap.at(StType); + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarDesc.size()).equals(VarDesc)) { + addTaintCategory(Gep, VarDesc); + } + } + } + } } } } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index efd7685f5..1178b65a5 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -100,68 +100,62 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { // handle functions if (Config.contains("functions")) { for (const auto &Func : Config["functions"]) { - // A functions name should be at the same index in the names array and the - // functions array - Functions.push_back(Func); - - findAndAddValue(Func, "name", FunctionNames); - findAndAddValue(Func, "ret", FunctionRets); - findAndAddValue(Func["params"], "source", FunctionParamSources); - findAndAddValue(Func["params"], "sink", FunctionParamSinks); - findAndAddValue(Func["params"], "sanitizer", FunctionParamSanitizers); + FunctionData Data = FunctionData(); + + if (Func.contains("name")) { + Data.Name = Func["name"]; + } + + if (Func.contains("ret")) { + Data.ReturnType = Func["ret"]; + } + + if (Func.contains("params") && Func["params"].contains("source")) { + for (const auto &Curr : Func["params"]["source"]) { + Data.SourceValues.push_back(Curr); + } + } + + if (Func.contains("params") && Func["params"].contains("sink")) { + for (const auto &Curr : Func["params"]["sink"]) { + Data.SinkValues.push_back(Curr); + } + } + + if (Func.contains("params") && Func["params"].contains("sanitizer")) { + for (const auto &Curr : Func["params"]["sanitizer"]) { + Data.SanitizerValues.push_back(Curr); + } + } + + Functions.push_back(std::move(Data)); } } // handle variables if (Config.contains("variables")) { for (const auto &Var : Config["variables"]) { - // A variables name should be at the same index in the names array and the - // variables array - Variables.push_back(Var); - - findAndAddValue(Config["variables"], "name", VariableNames); - findAndAddValue(Config["variables"], "scope", Variables); - findAndAddValue(Config["variables"], "line", VariableLines); - findAndAddValue(Config["variables"], "cat", VariableCats); + VariableData Data = VariableData(); + + if (Var.contains("line")) { + Data.Line = Var["line"]; + } + + if (Var.contains("name")) { + Data.Line = Var["name"]; + } + + if (Var.contains("scope")) { + Data.Line = Var["scope"]; + } + + if (Var.contains("cat")) { + Data.Line = Var["cat"]; + } + + Variables.push_back(std::move(Data)); } } } -const std::vector &TaintConfigData::getAllFunctions() const { - return Functions; -} -const std::vector &TaintConfigData::getAllFunctionNames() const { - return FunctionNames; -} -const std::vector &TaintConfigData::getAllFunctionRets() const { - return FunctionRets; -} -const std::vector & -TaintConfigData::getAllFunctionParamsSources() const { - return FunctionParamSources; -} -const std::vector & -TaintConfigData::getAllFunctionParamsSinks() const { - return FunctionParamSinks; -} -const std::vector & -TaintConfigData::getAllFunctionParamsSanitizers() const { - return FunctionParamSanitizers; -} -const std::vector &TaintConfigData::getAllVariables() const { - return Variables; -} -const std::vector &TaintConfigData::getAllVariableScopes() const { - return VariableScopes; -} -const std::vector &TaintConfigData::getAllVariableLines() const { - return VariableLines; -} -const std::vector &TaintConfigData::getAllVariableCats() const { - return VariableCats; -} -const std::vector &TaintConfigData::getAllVariableNames() const { - return VariableNames; -} - } // namespace psr \ No newline at end of file diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 43396faf5..f7d1a827a 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -16,6 +16,7 @@ #include "phasar/PhasarLLVM/Passes/ValueAnnotationPass.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/Utils/DebugOutput.h" #include "phasar/Utils/Utilities.h" @@ -65,7 +66,7 @@ class IDETaintAnalysisTest : public ::testing::Test { }, [&](json *JS) { TaintConfigData Data = TaintConfigData(*JS); - auto Ret = + LLVMTaintConfig Ret = LLVMTaintConfig(HA.getProjectIRDB(), Data); if (DumpResults) { llvm::errs() << Ret << "\n"; From 595d3134d887dcee07763c3e0d7a4528d2438bd9 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 18 Aug 2023 09:42:09 +0200 Subject: [PATCH 13/26] compiling version, tests fail --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 2 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 4 +++ .../TaintConfig/LLVMTaintConfig.cpp | 1 - .../TaintConfig/TaintConfigData.cpp | 32 +++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index 3b403e332..a7425be6e 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -20,7 +20,7 @@ namespace psr { class LLVMTaintConfig; class LLVMProjectIRDB; -class TaintConfigData; +struct TaintConfigData; template <> struct TaintConfigTraits { using n_t = const llvm::Instruction *; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 85cf0131b..22d634583 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -42,6 +42,10 @@ struct TaintConfigData { std::vector Functions; std::vector Variables; + + [[nodiscard]] std::vector getAllFunctionNames() const; + [[nodiscard]] std::vector getAllVariableLines() const; + [[nodiscard]] std::vector getAllVariableCats() const; }; } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index cce410357..7946e91dc 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -143,7 +143,6 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, // add corresponding Allocas or getElementPtr instructions to the taint // category - const auto &ConfigFunctions = Config.getAllFunctions(); const auto &ConfigFunctionNames = Config.getAllFunctionNames(); const auto &ConfigVariableLine = Config.getAllVariableLines(); const auto &ConfigVariableCat = Config.getAllVariableCats(); diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 1178b65a5..8dc3b5270 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -158,4 +158,36 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } +std::vector TaintConfigData::getAllFunctionNames() const { + std::vector FunctionNames; + FunctionNames.reserve(Functions.size()); + + for (const auto &Func : Functions) { + FunctionNames.push_back(Func.Name); + } + + return FunctionNames; +} + +std::vector TaintConfigData::getAllVariableLines() const { + std::vector VariableLines; + VariableLines.reserve(Variables.size()); + + for (const auto &Var : Variables) { + VariableLines.push_back(Var.Name); + } + + return VariableLines; +} +std::vector TaintConfigData::getAllVariableCats() const { + std::vector VariableCats; + VariableCats.reserve(Variables.size()); + + for (const auto &Var : Variables) { + VariableCats.push_back(Var.Name); + } + + return VariableCats; +} + } // namespace psr \ No newline at end of file From 986eabb96e77022d3adc87f623b36034c1e9ea88 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 24 Aug 2023 11:57:27 +0200 Subject: [PATCH 14/26] only 3 unittests fail now --- .../Problems/IDEExtendedTaintAnalysis.h | 6 +- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 3 +- .../TaintConfig/LLVMTaintConfig.cpp | 62 ++++---- .../TaintConfig/TaintConfigBase.cpp | 7 + .../TaintConfig/TaintConfigData.cpp | 144 +++++++++++------- tools/example-tool/myphasartool.cpp | 67 ++++---- .../Mono/InterMonoTaintAnalysisTest.cpp | 6 +- .../TaintConfig/TaintConfigTest.cpp | 51 +++++-- 8 files changed, 192 insertions(+), 154 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index 8a9979b52..c05815101 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -7,8 +7,8 @@ * Fabian Schiebel and others *****************************************************************************/ -#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H -#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H +#ifndef PHASAR_PHASARLLVM_DATAFLOWSOLVER_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H +#define PHASAR_PHASARLLVM_DATAFLOWSOLVER_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/Domain/LatticeDomain.h" @@ -354,4 +354,4 @@ class IDEExtendedTaintAnalysis : public XTaint::IDEExtendedTaintAnalysis { } // namespace psr -#endif +#endif \ No newline at end of file diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index a7425be6e..9fda92774 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -15,12 +15,11 @@ #include "llvm/IR/Instruction.h" #include -#include namespace psr { class LLVMTaintConfig; class LLVMProjectIRDB; -struct TaintConfigData; +class TaintConfigData; template <> struct TaintConfigTraits { using n_t = const llvm::Instruction *; diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 7946e91dc..b63ba9229 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -88,7 +88,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, } for (const auto &Idx : FunDesc.SinkValues) { if (Idx >= Fun->arg_size()) { - llvm::errs() << "ERROR: The source-function parameter index is out of " + llvm::errs() << "ERROR: The sink-function parameter index is out of " "bounds: " << Idx << "\n"; continue; @@ -98,9 +98,10 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, for (const auto &Idx : FunDesc.SanitizerValues) { if (Idx >= Fun->arg_size()) { - llvm::errs() << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; + llvm::errs() + << "ERROR: The sanitizer-function parameter index is out of " + "bounds: " + << Idx << "\n"; continue; } addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); @@ -140,37 +141,32 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, } DIF.reset(); } - // add corresponding Allocas or getElementPtr instructions to the taint // category - const auto &ConfigFunctionNames = Config.getAllFunctionNames(); - const auto &ConfigVariableLine = Config.getAllVariableLines(); - const auto &ConfigVariableCat = Config.getAllVariableCats(); - int Iter = -1; - for (const auto &Fun : Code.getAllFunctions()) { - Iter++; - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().str() == ConfigFunctionNames.at(Iter) && - (std::to_string(LocalVar->getLine()) == - ConfigVariableLine.at(Iter))) { - addTaintCategory(DbgDeclare->getAddress(), - ConfigVariableCat.at(Iter)); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarDesc.size()).equals(VarDesc)) { - addTaintCategory(Gep, VarDesc); + for (const auto &VarDesc : Config.Variables) { + for (const auto &Fun : Code.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().equals(VarDesc.Name) && + LocalVar->getLine() == VarDesc.Line) { + addTaintCategory(DbgDeclare->getAddress(), VarDesc.Cat); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + // const auto VarDesc = StructConfigMap.at(StType); + auto VarName = VarDesc.Name; + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { + addTaintCategory(Gep, VarDesc.Cat); + } } } } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 01e462415..d33b89c66 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -9,6 +9,8 @@ #include +#include + llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { switch (Cat) { case TaintCategory::Source: @@ -38,3 +40,8 @@ psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { } return std::move(*Ret); } + +std::optional +psr::parseTaintConfigOrNull(const llvm::Twine &Path) { + return TaintConfigData(Path.str()); +} diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 8dc3b5270..63478b73f 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -16,12 +16,19 @@ #include +#include + namespace psr { -std::optional -parseTaintConfigOrNull(const llvm::Twine &Path) { - std::optional TaintConfig = readJsonFile(Path); +TaintConfigData::TaintConfigData(const std::string &Filepath) { + llvm::outs() << "Constructor 0\n"; + llvm::outs().flush(); + std::optional TaintConfig = readJsonFile(Filepath); + llvm::outs() << "Constructor 1\n"; + llvm::outs().flush(); nlohmann::json_schema::json_validator Validator; + llvm::outs() << "Constructor 2\n"; + llvm::outs().flush(); try { static const nlohmann::json TaintConfigSchema = #include "../config/TaintConfigSchema.json" @@ -31,8 +38,10 @@ parseTaintConfigOrNull(const llvm::Twine &Path) { } catch (const std::exception &E) { PHASAR_LOG_LEVEL(ERROR, "Validation of schema failed, here is why: " << E.what()); - return std::nullopt; + return; } + llvm::outs() << "Constructor 3\n"; + llvm::outs().flush(); // a custom error handler class CustomJsonErrorHandler @@ -46,89 +55,91 @@ parseTaintConfigOrNull(const llvm::Twine &Path) { << "' - '" << Instance << "': " << Message); } }; + + llvm::outs() << "Constructor 4\n"; + llvm::outs().flush(); CustomJsonErrorHandler Err; Validator.validate(*TaintConfig, Err); + llvm::outs() << "Constructor 5\n"; + llvm::outs().flush(); if (Err) { + llvm::outs() << "[TaintConfigData::TaintConfigData()]: if (Err) {\n"; + llvm::outs().flush(); TaintConfig.reset(); + return; } - return std::optional(Path.str()); -} - -void findAndAddValue(const nlohmann::json &Config, const std::string &Value, - std::vector &Container) { - if (Config.contains(Value)) { - for (const auto &Curr : Config[Value]) { - Container.push_back(Curr); - } - } -} - -void addAllFunctions(const nlohmann::json &Config, - std::vector &Container) { - findAndAddValue(Config, "functions", Container); -} - -void addAllFunctionNames(const nlohmann::json &Function, - std::vector &Container) { - findAndAddValue(Function, "name", Container); -} - -void addAllFunctionRets(const nlohmann::json &Function, - std::vector &Container) { - findAndAddValue(Function, "ret", Container); -} - -void addAllFunctionParamsSources(const nlohmann::json &Param, - std::vector &Container) { - findAndAddValue(Param, "source", Container); -} - -void addAllFunctionParamsSinks(const nlohmann::json &Param, - std::vector &Container) { - findAndAddValue(Param, "sink", Container); -} - -void addAllFunctionParamsSanitizers(const nlohmann::json &Param, - std::vector &Container) { - findAndAddValue(Param, "sanitizer", Container); -} -TaintConfigData::TaintConfigData(const std::string &Filepath) { + llvm::outs() << "Constructor 6\n"; + llvm::outs().flush(); + if (!TaintConfig) { + llvm::outs() + << "[TaintConfigData::TaintConfigData()]: TaintConfigData is null"; + llvm::outs().flush(); + return; + }; - nlohmann::json Config(Filepath); + llvm::outs() << "Constructor 7\n"; + llvm::outs().flush(); + nlohmann::json Config = *TaintConfig; + // llvm::outs() << Config; + // llvm::outs().flush(); + llvm::outs() << "Constructor 8\n"; + llvm::outs().flush(); // handle functions if (Config.contains("functions")) { for (const auto &Func : Config["functions"]) { FunctionData Data = FunctionData(); + bool FuncPushBackFlag = false; if (Func.contains("name")) { + llvm::outs() << "[TaintConfigData::TaintConfigData()]: name test\n"; + llvm::outs().flush(); Data.Name = Func["name"]; + FuncPushBackFlag = true; } if (Func.contains("ret")) { + llvm::outs() << "[TaintConfigData::TaintConfigData()]: ret test\n"; + llvm::outs().flush(); Data.ReturnType = Func["ret"]; + FuncPushBackFlag = true; } if (Func.contains("params") && Func["params"].contains("source")) { for (const auto &Curr : Func["params"]["source"]) { - Data.SourceValues.push_back(Curr); + llvm::outs() << "[TaintConfigData::TaintConfigData()]: source test: " + << Curr.get() << "\n"; + llvm::outs().flush(); + Data.SourceValues.push_back(Curr.get()); } + FuncPushBackFlag = true; } if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { - Data.SinkValues.push_back(Curr); + Data.SinkValues.push_back(Curr.get()); + llvm::outs() << "[TaintConfigData::TaintConfigData()]: sink test" + << Curr.get() << "\n"; + llvm::outs().flush(); } + FuncPushBackFlag = true; } if (Func.contains("params") && Func["params"].contains("sanitizer")) { for (const auto &Curr : Func["params"]["sanitizer"]) { - Data.SanitizerValues.push_back(Curr); + llvm::outs() + << "[TaintConfigData::TaintConfigData()]: sanitizer test: " + << Curr.get() << "\n"; + llvm::outs().flush(); + Data.SanitizerValues.push_back(Curr.get()); } + FuncPushBackFlag = true; } - Functions.push_back(std::move(Data)); + if (FuncPushBackFlag) { + Functions.push_back(std::move(Data)); + } } } @@ -136,26 +147,45 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { if (Config.contains("variables")) { for (const auto &Var : Config["variables"]) { VariableData Data = VariableData(); + bool VarPushBackFlag = false; if (Var.contains("line")) { - Data.Line = Var["line"]; + Data.Line = Var["line"].get(); + VarPushBackFlag = true; + llvm::outs() << "line test: " << Var["line"].get() << "\n"; + llvm::outs().flush(); } if (Var.contains("name")) { - Data.Line = Var["name"]; + Data.Name = Var["name"].get(); + VarPushBackFlag = true; + llvm::outs() << "name test: " << Var.contains("name") << "\n"; + llvm::outs().flush(); } if (Var.contains("scope")) { - Data.Line = Var["scope"]; + Data.Scope = Var["scope"].get(); + VarPushBackFlag = true; + llvm::outs() << "scope test: " << Var["scope"].get() + << "\n"; + llvm::outs().flush(); } if (Var.contains("cat")) { - Data.Line = Var["cat"]; + Data.Cat = Var["cat"].get(); + VarPushBackFlag = true; + llvm::outs() << "cat test: " << Var["cat"].get() << "\n"; + llvm::outs().flush(); + } + if (VarPushBackFlag) { + Variables.push_back(std::move(Data)); } - - Variables.push_back(std::move(Data)); } } + + llvm::outs() << "Funcsize: " << Functions.size() + << " - Varsize: " << Variables.size() << "\n"; + llvm::outs().flush(); } std::vector TaintConfigData::getAllFunctionNames() const { diff --git a/tools/example-tool/myphasartool.cpp b/tools/example-tool/myphasartool.cpp index 642c6fd97..f87ee75b3 100644 --- a/tools/example-tool/myphasartool.cpp +++ b/tools/example-tool/myphasartool.cpp @@ -15,44 +15,31 @@ using namespace psr; int main(int Argc, const char **Argv) { - using namespace std::string_literals; - - if (Argc < 2 || !std::filesystem::exists(Argv[1]) || - std::filesystem::is_directory(Argv[1])) { - llvm::errs() << "myphasartool\n" - "A small PhASAR-based example program\n\n" - "Usage: myphasartool \n"; - return 1; - } - - std::vector EntryPoints = {"main"s}; - - HelperAnalyses HA(Argv[1], EntryPoints); - - if (const auto *F = HA.getProjectIRDB().getFunctionDefinition("main")) { - // print type hierarchy - HA.getTypeHierarchy().print(); - // print points-to information - HA.getAliasInfo().print(); - // print inter-procedural control-flow graph - HA.getICFG().print(); - - // IFDS template parametrization test - llvm::outs() << "Testing IFDS:\n"; - auto L = createAnalysisProblem(HA, EntryPoints); - IFDSSolver S(L, &HA.getICFG()); - auto IFDSResults = S.solve(); - IFDSResults.dumpResults(HA.getICFG(), L); - - // IDE template parametrization test - llvm::outs() << "Testing IDE:\n"; - auto M = createAnalysisProblem(HA, EntryPoints); - // Alternative way of solving an IFDS/IDEProblem: - auto IDEResults = solveIDEProblem(M, HA.getICFG()); - IDEResults.dumpResults(HA.getICFG(), M); - - } else { - llvm::errs() << "error: file does not contain a 'main' function!\n"; - } - return 0; + std::string File = + "/home/max/Desktop/Arbeit/phasar-f-TaintConfigSer/phasar/build/test/" + "llvm_test_code/TaintConfig/JsonConfig/array_01_c_dbg.ll"; + std::string Config = + "/home/max/Desktop/Arbeit/phasar-f-TaintConfigSer/phasar/build/test/" + "llvm_test_code/TaintConfig/JsonConfig/array_01_config.json"; + llvm::outs() << "Test 0\n"; + llvm::outs().flush(); + llvm::outs() << Config << "\n"; + llvm::outs() << File << "\n"; + llvm::outs().flush(); + auto JsonConfig = psr::TaintConfigData({Config}); + llvm::outs() << "Test 1\n"; + llvm::outs().flush(); + + psr::LLVMProjectIRDB IR({File}); + llvm::outs() << "Test 2\n"; + llvm::outs().flush(); + // IR.emitPreprocessedIR(llvm::outs(), false); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); + llvm::outs() << "Test 3\n"; + llvm::outs().flush(); + llvm::outs() << TConfig << '\n'; + const llvm::Value *I = IR.getInstruction(3); + // ASSERT_TRUE(TConfig.isSource(I)); + llvm::outs() << "Test 4\n"; + llvm::outs().flush(); } diff --git a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp index 94ce9b1fb..b163c38f8 100644 --- a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp @@ -7,7 +7,6 @@ #include "phasar/PhasarLLVM/Passes/ValueAnnotationPass.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" -#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" @@ -35,8 +34,7 @@ class InterMonoTaintAnalysisTest : public ::testing::Test { auto ConfigPath = (PathToLlFiles + "config.json").str(); auto BuildPos = ConfigPath.rfind("/build/") + 1; ConfigPath.erase(BuildPos, 6); - TaintConfigData Data = TaintConfigData(parseTaintConfig(ConfigPath)); - LLVMTaintConfig TC(HA.getProjectIRDB(), Data); + LLVMTaintConfig TC(HA.getProjectIRDB(), parseTaintConfig(ConfigPath)); TC.registerSinkCallBack([](const llvm::Instruction *Inst) { std::set Ret; if (const auto *Call = llvm::dyn_cast(Inst); @@ -393,4 +391,4 @@ TEST(InterMonoTaintAnalysisTestNF, TaintTest_05) { int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS(); -} +} \ No newline at end of file diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 8caadf83f..1da934b11 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -40,7 +40,6 @@ TEST_F(TaintConfigTest, Array_01) { const std::string File = "array_01_c_dbg.ll"; psr::LLVMProjectIRDB IR({PathToAttrTaintConfigTestCode + File}); psr::LLVMTaintConfig Config(IR); - llvm::outs() << Config << '\n'; const llvm::Value *I = IR.getInstruction(5); ASSERT_TRUE(Config.isSource(I)); } @@ -229,14 +228,28 @@ static constexpr auto PathToJsonTaintConfigTestCode = TEST_F(TaintConfigTest, Array_01_Json) { const std::string File = "array_01_c_dbg.ll"; const std::string Config = "array_01_config.json"; + llvm::outs() << "Test 0\n"; + llvm::outs().flush(); + llvm::outs() << PathToJsonTaintConfigTestCode.str() + Config << "\n"; + llvm::outs() << PathToJsonTaintConfigTestCode.str() + File << "\n"; + llvm::outs().flush(); auto JsonConfig = - psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); + psr::TaintConfigData({PathToJsonTaintConfigTestCode.str() + Config}); + llvm::outs() << "Test 1\n"; + llvm::outs().flush(); + psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); + llvm::outs() << "Test 2\n"; + llvm::outs().flush(); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); + llvm::outs() << "Test 3\n"; + llvm::outs().flush(); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); + llvm::outs() << "Test 4\n"; + llvm::outs().flush(); } TEST_F(TaintConfigTest, Array_02_Json) { @@ -246,7 +259,7 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); @@ -258,7 +271,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const auto *Bar = IR.getFunction("bar"); assert(Bar); @@ -282,10 +295,18 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + llvm::outs() << "Test 1\n"; + llvm::outs().flush(); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); + llvm::outs() << "Test 2\n"; + llvm::outs().flush(); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); + llvm::outs() << "Test 3\n"; + llvm::outs().flush(); const llvm::Value *I2 = IR.getInstruction(18); + llvm::outs() << "Test 4\n"; + llvm::outs().flush(); ASSERT_TRUE(TConfig.isSource(I1)); ASSERT_TRUE(TConfig.isSource(I2)); } @@ -296,7 +317,7 @@ TEST_F(TaintConfigTest, Basic_03_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const auto *TaintPair = IR.getFunction("taintPair"); assert(TaintPair); @@ -314,7 +335,7 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); @@ -327,7 +348,7 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); @@ -342,7 +363,7 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { @@ -366,7 +387,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); @@ -398,7 +419,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; llvm::outs().flush(); for (const auto *F : IR.getAllFunctions()) { @@ -420,7 +441,7 @@ TEST_F(TaintConfigTest, StaticFun_01_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); @@ -442,7 +463,7 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - psr::LLVMTaintConfig TConfig(IR, psr::TaintConfigData(JsonConfig)); + psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; const llvm::Value *CallInst = IR.getInstruction(13); const auto *I = llvm::dyn_cast(CallInst); @@ -468,4 +489,4 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS(); -} +} \ No newline at end of file From 3a2909aca30607da7ce88ae729013c622e4ca6ff Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 24 Aug 2023 17:03:20 +0200 Subject: [PATCH 15/26] fixed a bug with sink values causing a crash --- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 1 + .../TaintConfig/LLVMTaintConfig.cpp | 11 ++- .../TaintConfig/TaintConfigData.cpp | 84 +++++++------------ 3 files changed, 40 insertions(+), 56 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 22d634583..709552a71 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -24,6 +24,7 @@ struct FunctionData { std::string ReturnType; std::vector SourceValues; std::vector SinkValues; + std::vector SinkStringValues; std::vector SanitizerValues; }; diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index b63ba9229..fa2ab563a 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -96,6 +96,14 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } + for (const auto &Idx : FunDesc.SinkStringValues) { + if (Idx == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); + } + } + } + for (const auto &Idx : FunDesc.SanitizerValues) { if (Idx >= Fun->arg_size()) { llvm::errs() @@ -159,8 +167,7 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, const auto *StType = llvm::dyn_cast( Gep->getPointerOperandType()->getPointerElementType()); if (StType && StructConfigMap.count(StType)) { - // const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc.Name; + auto VarName = StructConfigMap.at(StType); // using substr to cover the edge case in which same variable // name is present as a local variable and also as a struct // member variable. (Ex. JsonConfig/fun_member_02.cpp) diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 63478b73f..0023d5706 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -21,14 +21,8 @@ namespace psr { TaintConfigData::TaintConfigData(const std::string &Filepath) { - llvm::outs() << "Constructor 0\n"; - llvm::outs().flush(); std::optional TaintConfig = readJsonFile(Filepath); - llvm::outs() << "Constructor 1\n"; - llvm::outs().flush(); nlohmann::json_schema::json_validator Validator; - llvm::outs() << "Constructor 2\n"; - llvm::outs().flush(); try { static const nlohmann::json TaintConfigSchema = #include "../config/TaintConfigSchema.json" @@ -40,8 +34,6 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { "Validation of schema failed, here is why: " << E.what()); return; } - llvm::outs() << "Constructor 3\n"; - llvm::outs().flush(); // a custom error handler class CustomJsonErrorHandler @@ -56,36 +48,22 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } }; - llvm::outs() << "Constructor 4\n"; - llvm::outs().flush(); CustomJsonErrorHandler Err; Validator.validate(*TaintConfig, Err); - llvm::outs() << "Constructor 5\n"; - llvm::outs().flush(); if (Err) { - llvm::outs() << "[TaintConfigData::TaintConfigData()]: if (Err) {\n"; - llvm::outs().flush(); TaintConfig.reset(); return; } - llvm::outs() << "Constructor 6\n"; - llvm::outs().flush(); if (!TaintConfig) { llvm::outs() - << "[TaintConfigData::TaintConfigData()]: TaintConfigData is null"; + << "[TaintConfigData::TaintConfigData()]: TaintConfigData is null!"; llvm::outs().flush(); return; }; - llvm::outs() << "Constructor 7\n"; - llvm::outs().flush(); nlohmann::json Config = *TaintConfig; - // llvm::outs() << Config; - // llvm::outs().flush(); - llvm::outs() << "Constructor 8\n"; - llvm::outs().flush(); // handle functions if (Config.contains("functions")) { for (const auto &Func : Config["functions"]) { @@ -93,50 +71,62 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { bool FuncPushBackFlag = false; if (Func.contains("name")) { - llvm::outs() << "[TaintConfigData::TaintConfigData()]: name test\n"; - llvm::outs().flush(); - Data.Name = Func["name"]; + Data.Name = Func["name"].get(); FuncPushBackFlag = true; } if (Func.contains("ret")) { - llvm::outs() << "[TaintConfigData::TaintConfigData()]: ret test\n"; - llvm::outs().flush(); Data.ReturnType = Func["ret"]; FuncPushBackFlag = true; } if (Func.contains("params") && Func["params"].contains("source")) { for (const auto &Curr : Func["params"]["source"]) { - llvm::outs() << "[TaintConfigData::TaintConfigData()]: source test: " - << Curr.get() << "\n"; - llvm::outs().flush(); Data.SourceValues.push_back(Curr.get()); } FuncPushBackFlag = true; } + /*if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); + } + } + } + } + }*/ + if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { - Data.SinkValues.push_back(Curr.get()); - llvm::outs() << "[TaintConfigData::TaintConfigData()]: sink test" - << Curr.get() << "\n"; - llvm::outs().flush(); + if (Curr.is_string()) { + Data.SinkStringValues.push_back(Curr.get()); + } else { + Data.SinkValues.push_back(Curr.get()); + } } FuncPushBackFlag = true; } if (Func.contains("params") && Func["params"].contains("sanitizer")) { for (const auto &Curr : Func["params"]["sanitizer"]) { - llvm::outs() - << "[TaintConfigData::TaintConfigData()]: sanitizer test: " - << Curr.get() << "\n"; - llvm::outs().flush(); + Data.SanitizerValues.push_back(Curr.get()); } FuncPushBackFlag = true; } - if (FuncPushBackFlag) { Functions.push_back(std::move(Data)); } @@ -148,44 +138,30 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { for (const auto &Var : Config["variables"]) { VariableData Data = VariableData(); bool VarPushBackFlag = false; - if (Var.contains("line")) { Data.Line = Var["line"].get(); VarPushBackFlag = true; - llvm::outs() << "line test: " << Var["line"].get() << "\n"; - llvm::outs().flush(); } if (Var.contains("name")) { Data.Name = Var["name"].get(); VarPushBackFlag = true; - llvm::outs() << "name test: " << Var.contains("name") << "\n"; - llvm::outs().flush(); } if (Var.contains("scope")) { Data.Scope = Var["scope"].get(); VarPushBackFlag = true; - llvm::outs() << "scope test: " << Var["scope"].get() - << "\n"; - llvm::outs().flush(); } if (Var.contains("cat")) { Data.Cat = Var["cat"].get(); VarPushBackFlag = true; - llvm::outs() << "cat test: " << Var["cat"].get() << "\n"; - llvm::outs().flush(); } if (VarPushBackFlag) { Variables.push_back(std::move(Data)); } } } - - llvm::outs() << "Funcsize: " << Functions.size() - << " - Varsize: " << Variables.size() << "\n"; - llvm::outs().flush(); } std::vector TaintConfigData::getAllFunctionNames() const { From 0c831701c5a0007c37a6e9b46f19464dfa330273 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 25 Aug 2023 10:59:19 +0200 Subject: [PATCH 16/26] all unittests pass --- .../TaintConfig/LLVMTaintConfig.cpp | 5 ++-- .../TaintConfig/TaintConfigData.cpp | 30 ++----------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index fa2ab563a..311c55a01 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -60,9 +60,7 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const TaintConfigData &Config) { - int Counter = -1; for (const auto &FunDesc : Config.Functions) { - Counter++; auto Name = FunDesc.Name; auto FnDefs = findAllFunctionDefs(IRDB, Name); @@ -84,6 +82,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, // remaining parameters as well continue; } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); } for (const auto &Idx : FunDesc.SinkValues) { @@ -139,7 +138,7 @@ LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, DIF.processModule(*M); for (const auto &Ty : DIF.types()) { if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc.Name)) { + Ty->getName().equals(VarDesc.Scope)) { for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { StructConfigMap.insert( std::pair(LlvmStructTy, diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 0023d5706..9292ecaf6 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -14,10 +14,6 @@ #include "nlohmann/json-schema.hpp" #include "nlohmann/json.hpp" -#include - -#include - namespace psr { TaintConfigData::TaintConfigData(const std::string &Filepath) { @@ -76,7 +72,7 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } if (Func.contains("ret")) { - Data.ReturnType = Func["ret"]; + Data.ReturnType = Func["ret"].get(); FuncPushBackFlag = true; } @@ -87,28 +83,6 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { FuncPushBackFlag = true; } - /*if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } - } - } - } - }*/ - if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { if (Curr.is_string()) { @@ -122,11 +96,11 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { if (Func.contains("params") && Func["params"].contains("sanitizer")) { for (const auto &Curr : Func["params"]["sanitizer"]) { - Data.SanitizerValues.push_back(Curr.get()); } FuncPushBackFlag = true; } + if (FuncPushBackFlag) { Functions.push_back(std::move(Data)); } From 7cba2808c1e6b2ea01aae4279d7cc56aab0418cd Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 4 Sep 2023 07:35:10 +0200 Subject: [PATCH 17/26] review fixes --- .../Problems/IDEExtendedTaintAnalysis.h | 4 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 7 +-- .../TaintConfig/LLVMTaintConfig.cpp | 10 ++--- .../TaintConfig/TaintConfigBase.cpp | 8 ++-- .../TaintConfig/TaintConfigData.cpp | 44 ++++--------------- tools/example-tool/myphasartool.cpp | 31 +------------ .../Mono/InterMonoTaintAnalysisTest.cpp | 2 +- 7 files changed, 22 insertions(+), 84 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index c05815101..a0778fbbc 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -7,8 +7,8 @@ * Fabian Schiebel and others *****************************************************************************/ -#ifndef PHASAR_PHASARLLVM_DATAFLOWSOLVER_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H -#define PHASAR_PHASARLLVM_DATAFLOWSOLVER_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEEXTENDEDTAINTANALYSIS_H #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/Domain/LatticeDomain.h" diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 709552a71..622f88eb9 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -24,8 +24,8 @@ struct FunctionData { std::string ReturnType; std::vector SourceValues; std::vector SinkValues; - std::vector SinkStringValues; std::vector SanitizerValues; + bool HasAllSinkParam = false; }; struct VariableData { @@ -38,15 +38,10 @@ struct VariableData { }; struct TaintConfigData { - TaintConfigData() = default; explicit TaintConfigData(const std::string &Filepath); std::vector Functions; std::vector Variables; - - [[nodiscard]] std::vector getAllFunctionNames() const; - [[nodiscard]] std::vector getAllVariableLines() const; - [[nodiscard]] std::vector getAllVariableCats() const; }; } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 311c55a01..d37f016e4 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -61,7 +61,7 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, const TaintConfigData &Config) { for (const auto &FunDesc : Config.Functions) { - auto Name = FunDesc.Name; + const auto &Name = FunDesc.Name; auto FnDefs = findAllFunctionDefs(IRDB, Name); @@ -95,11 +95,9 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } - for (const auto &Idx : FunDesc.SinkStringValues) { - if (Idx == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, TaintCategory::Sink); - } + if (FunDesc.HasAllSinkParam) { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); } } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index d33b89c66..ac650c864 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -7,9 +7,9 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include +#include "nlohmann/json-schema.hpp" -#include +#include llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { switch (Cat) { @@ -36,7 +36,9 @@ psr::TaintCategory psr::toTaintCategory(llvm::StringRef Str) noexcept { psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { auto Ret = parseTaintConfigOrNull(Path); if (!Ret) { - return {}; + /*TODO: assertion oder error thrown*/ + llvm::errs() << "ERROR: TaintConfigData is null\n"; + abort(); } return std::move(*Ret); } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 9292ecaf6..c3205114c 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -85,10 +85,14 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { - if (Curr.is_string()) { - Data.SinkStringValues.push_back(Curr.get()); - } else { + if (Curr.get()) { Data.SinkValues.push_back(Curr.get()); + } else if (Curr.is_string() && Curr.get() == "all") { + Data.HasAllSinkParam = true; + } else { + llvm::outs() << "[TaintConfigData::TaintConfigData()]: " + "Unknown sink string parameter!"; + llvm::outs().flush(); } } FuncPushBackFlag = true; @@ -138,36 +142,4 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } -std::vector TaintConfigData::getAllFunctionNames() const { - std::vector FunctionNames; - FunctionNames.reserve(Functions.size()); - - for (const auto &Func : Functions) { - FunctionNames.push_back(Func.Name); - } - - return FunctionNames; -} - -std::vector TaintConfigData::getAllVariableLines() const { - std::vector VariableLines; - VariableLines.reserve(Variables.size()); - - for (const auto &Var : Variables) { - VariableLines.push_back(Var.Name); - } - - return VariableLines; -} -std::vector TaintConfigData::getAllVariableCats() const { - std::vector VariableCats; - VariableCats.reserve(Variables.size()); - - for (const auto &Var : Variables) { - VariableCats.push_back(Var.Name); - } - - return VariableCats; -} - -} // namespace psr \ No newline at end of file +} // namespace psr diff --git a/tools/example-tool/myphasartool.cpp b/tools/example-tool/myphasartool.cpp index f87ee75b3..92e5fc50e 100644 --- a/tools/example-tool/myphasartool.cpp +++ b/tools/example-tool/myphasartool.cpp @@ -9,37 +9,8 @@ #include "phasar.h" -#include #include using namespace psr; -int main(int Argc, const char **Argv) { - std::string File = - "/home/max/Desktop/Arbeit/phasar-f-TaintConfigSer/phasar/build/test/" - "llvm_test_code/TaintConfig/JsonConfig/array_01_c_dbg.ll"; - std::string Config = - "/home/max/Desktop/Arbeit/phasar-f-TaintConfigSer/phasar/build/test/" - "llvm_test_code/TaintConfig/JsonConfig/array_01_config.json"; - llvm::outs() << "Test 0\n"; - llvm::outs().flush(); - llvm::outs() << Config << "\n"; - llvm::outs() << File << "\n"; - llvm::outs().flush(); - auto JsonConfig = psr::TaintConfigData({Config}); - llvm::outs() << "Test 1\n"; - llvm::outs().flush(); - - psr::LLVMProjectIRDB IR({File}); - llvm::outs() << "Test 2\n"; - llvm::outs().flush(); - // IR.emitPreprocessedIR(llvm::outs(), false); - psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << "Test 3\n"; - llvm::outs().flush(); - llvm::outs() << TConfig << '\n'; - const llvm::Value *I = IR.getInstruction(3); - // ASSERT_TRUE(TConfig.isSource(I)); - llvm::outs() << "Test 4\n"; - llvm::outs().flush(); -} +int main(int Argc, const char **Argv) {} diff --git a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp index b163c38f8..0ab2c5bc7 100644 --- a/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/Mono/InterMonoTaintAnalysisTest.cpp @@ -391,4 +391,4 @@ TEST(InterMonoTaintAnalysisTestNF, TaintTest_05) { int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} From 36a43ef69691eb6c7fd822b46feb502c4f874e80 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 4 Sep 2023 09:23:40 +0200 Subject: [PATCH 18/26] one faulty unittest remaining --- .../TaintConfig/LLVMTaintConfig.cpp | 11 +++++++++ .../TaintConfig/TaintConfigData.cpp | 24 ++++++++++++++++++- .../TaintConfig/TaintConfigTest.cpp | 11 ++++++--- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index d37f016e4..1fd02e79a 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -92,6 +92,11 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, << Idx << "\n"; continue; } + + llvm::outs() << "\nInLLVMTC\nFun->getArg(Idx): " << Fun->getArg(Idx) + << "\n"; + llvm::outs().flush(); + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } @@ -303,6 +308,12 @@ bool LLVMTaintConfig::isSourceImpl(const llvm::Value *V) const { return SourceValues.count(V); } bool LLVMTaintConfig::isSinkImpl(const llvm::Value *V) const { + llvm::outs() << "V val: " << V << "\n"; + llvm::outs().flush(); + for (const auto &Test : SinkValues) { + llvm::outs() << "Test: " << Test << "\n"; + llvm::outs().flush(); + } return SinkValues.count(V); } bool LLVMTaintConfig::isSanitizerImpl(const llvm::Value *V) const { diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index c3205114c..f5db368fc 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -14,6 +14,8 @@ #include "nlohmann/json-schema.hpp" #include "nlohmann/json.hpp" +#include + namespace psr { TaintConfigData::TaintConfigData(const std::string &Filepath) { @@ -60,22 +62,30 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { nlohmann::json Config = *TaintConfig; + llvm::outs() << "before functions\n"; + llvm::outs().flush(); // handle functions if (Config.contains("functions")) { for (const auto &Func : Config["functions"]) { FunctionData Data = FunctionData(); bool FuncPushBackFlag = false; + llvm::outs() << "name\n"; + llvm::outs().flush(); if (Func.contains("name")) { Data.Name = Func["name"].get(); FuncPushBackFlag = true; } + llvm::outs() << "ret\n"; + llvm::outs().flush(); if (Func.contains("ret")) { Data.ReturnType = Func["ret"].get(); FuncPushBackFlag = true; } + llvm::outs() << "source\n"; + llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("source")) { for (const auto &Curr : Func["params"]["source"]) { Data.SourceValues.push_back(Curr.get()); @@ -83,9 +93,14 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { FuncPushBackFlag = true; } + llvm::outs() << "sink\n"; + llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { - if (Curr.get()) { + if (Curr.is_number()) { + llvm::outs() << "i am number " << std::to_string(Curr.get()) + << "\n"; + llvm::outs().flush(); Data.SinkValues.push_back(Curr.get()); } else if (Curr.is_string() && Curr.get() == "all") { Data.HasAllSinkParam = true; @@ -98,6 +113,8 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { FuncPushBackFlag = true; } + llvm::outs() << "sanitizer\n"; + llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("sanitizer")) { for (const auto &Curr : Func["params"]["sanitizer"]) { Data.SanitizerValues.push_back(Curr.get()); @@ -111,6 +128,8 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } + llvm::outs() << "before variables\n"; + llvm::outs().flush(); // handle variables if (Config.contains("variables")) { for (const auto &Var : Config["variables"]) { @@ -140,6 +159,9 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } } + + llvm::outs() << "all done\n"; + llvm::outs().flush(); } } // namespace psr diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 1da934b11..55bb08c83 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -368,27 +368,31 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { for (const auto &F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { assert(F); - for (const auto &User : F->users()) { + for (const auto *const User : F->users()) { if (llvm::isa(User)) { ASSERT_TRUE(TConfig.isSource(User)); } } } else if (F->getName().contains("bar")) { assert(F); - ASSERT_TRUE(TConfig.isSink(F->getArg(0))); + ASSERT_TRUE(TConfig.isSink(F->getArg(1))); } } } TEST_F(TaintConfigTest, FunMember_02_Json) { + llvm::outs() << "start\n"; + llvm::outs().flush(); + const std::string File = "fun_member_02_cpp_dbg.ll"; const std::string Config = "fun_member_02_config.json"; + auto JsonConfig = psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); + psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(18); const llvm::Value *I2 = IR.getInstruction(54); const llvm::Value *I3 = IR.getInstruction(63); @@ -397,6 +401,7 @@ TEST_F(TaintConfigTest, FunMember_02_Json) { ASSERT_TRUE(TConfig.isSource(I2)); ASSERT_TRUE(TConfig.isSource(I3)); ASSERT_TRUE(TConfig.isSource(I4)); + const auto *DestructorX = IR.getFunction("_ZN1XD2Ev"); assert(DestructorX); for (const auto *Arg = DestructorX->arg_begin(); From 6427805ba1f8eb3503e1487ecc840eaeee321c8d Mon Sep 17 00:00:00 2001 From: mxHuber Date: Tue, 5 Sep 2023 09:22:57 +0200 Subject: [PATCH 19/26] all unittests pass + myphasartool revert --- .../TaintConfig/LLVMTaintConfig.cpp | 10 ----- .../TaintConfig/TaintConfigData.cpp | 26 +---------- tools/example-tool/myphasartool.cpp | 44 ++++++++++++++++++- .../TaintConfig/TaintConfigTest.cpp | 41 +++-------------- 4 files changed, 51 insertions(+), 70 deletions(-) diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 1fd02e79a..411535ffc 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -93,10 +93,6 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, continue; } - llvm::outs() << "\nInLLVMTC\nFun->getArg(Idx): " << Fun->getArg(Idx) - << "\n"; - llvm::outs().flush(); - addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); } @@ -308,12 +304,6 @@ bool LLVMTaintConfig::isSourceImpl(const llvm::Value *V) const { return SourceValues.count(V); } bool LLVMTaintConfig::isSinkImpl(const llvm::Value *V) const { - llvm::outs() << "V val: " << V << "\n"; - llvm::outs().flush(); - for (const auto &Test : SinkValues) { - llvm::outs() << "Test: " << Test << "\n"; - llvm::outs().flush(); - } return SinkValues.count(V); } bool LLVMTaintConfig::isSanitizerImpl(const llvm::Value *V) const { diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index f5db368fc..d90f26ccd 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -54,38 +54,29 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } if (!TaintConfig) { - llvm::outs() + llvm::errs() << "[TaintConfigData::TaintConfigData()]: TaintConfigData is null!"; - llvm::outs().flush(); return; }; nlohmann::json Config = *TaintConfig; - llvm::outs() << "before functions\n"; - llvm::outs().flush(); // handle functions if (Config.contains("functions")) { for (const auto &Func : Config["functions"]) { FunctionData Data = FunctionData(); bool FuncPushBackFlag = false; - llvm::outs() << "name\n"; - llvm::outs().flush(); if (Func.contains("name")) { Data.Name = Func["name"].get(); FuncPushBackFlag = true; } - llvm::outs() << "ret\n"; - llvm::outs().flush(); if (Func.contains("ret")) { Data.ReturnType = Func["ret"].get(); FuncPushBackFlag = true; } - llvm::outs() << "source\n"; - llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("source")) { for (const auto &Curr : Func["params"]["source"]) { Data.SourceValues.push_back(Curr.get()); @@ -93,28 +84,20 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { FuncPushBackFlag = true; } - llvm::outs() << "sink\n"; - llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("sink")) { for (const auto &Curr : Func["params"]["sink"]) { if (Curr.is_number()) { - llvm::outs() << "i am number " << std::to_string(Curr.get()) - << "\n"; - llvm::outs().flush(); Data.SinkValues.push_back(Curr.get()); } else if (Curr.is_string() && Curr.get() == "all") { Data.HasAllSinkParam = true; } else { - llvm::outs() << "[TaintConfigData::TaintConfigData()]: " + llvm::errs() << "[TaintConfigData::TaintConfigData()]: " "Unknown sink string parameter!"; - llvm::outs().flush(); } } FuncPushBackFlag = true; } - llvm::outs() << "sanitizer\n"; - llvm::outs().flush(); if (Func.contains("params") && Func["params"].contains("sanitizer")) { for (const auto &Curr : Func["params"]["sanitizer"]) { Data.SanitizerValues.push_back(Curr.get()); @@ -128,8 +111,6 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } - llvm::outs() << "before variables\n"; - llvm::outs().flush(); // handle variables if (Config.contains("variables")) { for (const auto &Var : Config["variables"]) { @@ -159,9 +140,6 @@ TaintConfigData::TaintConfigData(const std::string &Filepath) { } } } - - llvm::outs() << "all done\n"; - llvm::outs().flush(); } } // namespace psr diff --git a/tools/example-tool/myphasartool.cpp b/tools/example-tool/myphasartool.cpp index 92e5fc50e..642c6fd97 100644 --- a/tools/example-tool/myphasartool.cpp +++ b/tools/example-tool/myphasartool.cpp @@ -9,8 +9,50 @@ #include "phasar.h" +#include #include using namespace psr; -int main(int Argc, const char **Argv) {} +int main(int Argc, const char **Argv) { + using namespace std::string_literals; + + if (Argc < 2 || !std::filesystem::exists(Argv[1]) || + std::filesystem::is_directory(Argv[1])) { + llvm::errs() << "myphasartool\n" + "A small PhASAR-based example program\n\n" + "Usage: myphasartool \n"; + return 1; + } + + std::vector EntryPoints = {"main"s}; + + HelperAnalyses HA(Argv[1], EntryPoints); + + if (const auto *F = HA.getProjectIRDB().getFunctionDefinition("main")) { + // print type hierarchy + HA.getTypeHierarchy().print(); + // print points-to information + HA.getAliasInfo().print(); + // print inter-procedural control-flow graph + HA.getICFG().print(); + + // IFDS template parametrization test + llvm::outs() << "Testing IFDS:\n"; + auto L = createAnalysisProblem(HA, EntryPoints); + IFDSSolver S(L, &HA.getICFG()); + auto IFDSResults = S.solve(); + IFDSResults.dumpResults(HA.getICFG(), L); + + // IDE template parametrization test + llvm::outs() << "Testing IDE:\n"; + auto M = createAnalysisProblem(HA, EntryPoints); + // Alternative way of solving an IFDS/IDEProblem: + auto IDEResults = solveIDEProblem(M, HA.getICFG()); + IDEResults.dumpResults(HA.getICFG(), M); + + } else { + llvm::errs() << "error: file does not contain a 'main' function!\n"; + } + return 0; +} diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 55bb08c83..f42649f45 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -228,28 +228,17 @@ static constexpr auto PathToJsonTaintConfigTestCode = TEST_F(TaintConfigTest, Array_01_Json) { const std::string File = "array_01_c_dbg.ll"; const std::string Config = "array_01_config.json"; - llvm::outs() << "Test 0\n"; - llvm::outs().flush(); - llvm::outs() << PathToJsonTaintConfigTestCode.str() + Config << "\n"; - llvm::outs() << PathToJsonTaintConfigTestCode.str() + File << "\n"; - llvm::outs().flush(); + auto JsonConfig = psr::TaintConfigData({PathToJsonTaintConfigTestCode.str() + Config}); - llvm::outs() << "Test 1\n"; - llvm::outs().flush(); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); - llvm::outs() << "Test 2\n"; - llvm::outs().flush(); + // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << "Test 3\n"; - llvm::outs().flush(); - llvm::outs() << TConfig << '\n'; + ; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); - llvm::outs() << "Test 4\n"; - llvm::outs().flush(); } TEST_F(TaintConfigTest, Array_02_Json) { @@ -260,7 +249,6 @@ TEST_F(TaintConfigTest, Array_02_Json) { psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); } @@ -272,7 +260,7 @@ TEST_F(TaintConfigTest, Basic_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; + const auto *Bar = IR.getFunction("bar"); assert(Bar); for (const auto &User : Bar->users()) { @@ -295,18 +283,9 @@ TEST_F(TaintConfigTest, Basic_02_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); - llvm::outs() << "Test 1\n"; - llvm::outs().flush(); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << "Test 2\n"; - llvm::outs().flush(); - llvm::outs() << TConfig << '\n'; const llvm::Value *I1 = IR.getInstruction(7); - llvm::outs() << "Test 3\n"; - llvm::outs().flush(); const llvm::Value *I2 = IR.getInstruction(18); - llvm::outs() << "Test 4\n"; - llvm::outs().flush(); ASSERT_TRUE(TConfig.isSource(I1)); ASSERT_TRUE(TConfig.isSource(I2)); } @@ -336,8 +315,6 @@ TEST_F(TaintConfigTest, Basic_04_Json) { psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; - llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(2); ASSERT_TRUE(TConfig.isSource(I)); } @@ -349,8 +326,6 @@ TEST_F(TaintConfigTest, DataMember_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; - llvm::outs().flush(); const llvm::Value *I = IR.getInstruction(17); // IR.emitPreprocessedIR(llvm::outs(), false); ASSERT_TRUE(TConfig.isSource(I)); @@ -375,15 +350,12 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { } } else if (F->getName().contains("bar")) { assert(F); - ASSERT_TRUE(TConfig.isSink(F->getArg(1))); + ASSERT_TRUE(TConfig.isSink(F->getArg(0))); } } } TEST_F(TaintConfigTest, FunMember_02_Json) { - llvm::outs() << "start\n"; - llvm::outs().flush(); - const std::string File = "fun_member_02_cpp_dbg.ll"; const std::string Config = "fun_member_02_config.json"; @@ -425,8 +397,7 @@ TEST_F(TaintConfigTest, NameMangling_01_Json) { psr::parseTaintConfig(PathToJsonTaintConfigTestCode + Config); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - llvm::outs() << TConfig << '\n'; - llvm::outs().flush(); + for (const auto *F : IR.getAllFunctions()) { std::string FName = getFunctionName(llvm::demangle(F->getName().str())); if (FName == "foo") { From d072b6be9f20376dcfae263edf71580ca8c90dc6 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 6 Sep 2023 10:33:33 +0200 Subject: [PATCH 20/26] pre-commit stuff --- .../DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h | 2 +- lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp | 2 +- unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index a0778fbbc..8a9979b52 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -354,4 +354,4 @@ class IDEExtendedTaintAnalysis : public XTaint::IDEExtendedTaintAnalysis { } // namespace psr -#endif \ No newline at end of file +#endif diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 411535ffc..640c9702a 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -491,4 +491,4 @@ void LLVMTaintConfig::printImpl(llvm::raw_ostream &OS) const { } template class TaintConfigBase; -} // namespace psr \ No newline at end of file +} // namespace psr diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index f42649f45..8d3c6285f 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -465,4 +465,4 @@ TEST_F(TaintConfigTest, StaticFun_02_Json) { int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} From 69fb508c3cac4e26a6f397a4161192d106e0905b Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 6 Sep 2023 17:05:36 +0200 Subject: [PATCH 21/26] review changes + unittest fixed --- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 2 - .../TaintConfig/TaintConfigBase.cpp | 130 +++++++++++++++- .../TaintConfig/TaintConfigData.cpp | 145 ------------------ .../Problems/IDEExtendedTaintAnalysisTest.cpp | 28 +++- .../TaintConfig/TaintConfigTest.cpp | 2 +- 5 files changed, 150 insertions(+), 157 deletions(-) delete mode 100644 lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 622f88eb9..db5779228 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -38,8 +38,6 @@ struct VariableData { }; struct TaintConfigData { - explicit TaintConfigData(const std::string &Filepath); - std::vector Functions; std::vector Variables; }; diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index ac650c864..2c32c622e 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -1,5 +1,6 @@ #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/Utils/IO.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/NlohmannLogging.h" @@ -43,7 +44,134 @@ psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { return std::move(*Ret); } +void handleFunctions(const nlohmann::json &JSON, + std::vector &Container) { + if (JSON.contains("functions")) { + for (const auto &Func : JSON["functions"]) { + psr::FunctionData Data = psr::FunctionData(); + bool FuncPushBackFlag = false; + + if (Func.contains("name")) { + Data.Name = Func["name"].get(); + FuncPushBackFlag = true; + } + + if (Func.contains("ret")) { + Data.ReturnType = Func["ret"].get(); + FuncPushBackFlag = true; + } + + if (Func.contains("params") && Func["params"].contains("source")) { + for (const auto &Curr : Func["params"]["source"]) { + Data.SourceValues.push_back(Curr.get()); + } + FuncPushBackFlag = true; + } + + if (Func.contains("params") && Func["params"].contains("sink")) { + for (const auto &Curr : Func["params"]["sink"]) { + if (Curr.is_number()) { + Data.SinkValues.push_back(Curr.get()); + } else if (Curr.is_string() && Curr.get() == "all") { + Data.HasAllSinkParam = true; + } else { + llvm::errs() << "[TaintConfigData::TaintConfigData()]: " + "Unknown sink string parameter!"; + } + } + FuncPushBackFlag = true; + } + + if (Func.contains("params") && Func["params"].contains("sanitizer")) { + for (const auto &Curr : Func["params"]["sanitizer"]) { + Data.SanitizerValues.push_back(Curr.get()); + } + FuncPushBackFlag = true; + } + + if (FuncPushBackFlag) { + Container.push_back(std::move(Data)); + } + } + } +} + +void handleVariables(const nlohmann::json &JSON, + std::vector &Container) { + if (JSON.contains("variables")) { + for (const auto &Var : JSON["variables"]) { + psr::VariableData Data = psr::VariableData(); + bool VarPushBackFlag = false; + if (Var.contains("line")) { + Data.Line = Var["line"].get(); + VarPushBackFlag = true; + } + + if (Var.contains("name")) { + Data.Name = Var["name"].get(); + VarPushBackFlag = true; + } + + if (Var.contains("scope")) { + Data.Scope = Var["scope"].get(); + VarPushBackFlag = true; + } + + if (Var.contains("cat")) { + Data.Cat = Var["cat"].get(); + VarPushBackFlag = true; + } + if (VarPushBackFlag) { + Container.push_back(std::move(Data)); + } + } + } +} + std::optional psr::parseTaintConfigOrNull(const llvm::Twine &Path) { - return TaintConfigData(Path.str()); + std::optional TaintConfig = readJsonFile(Path); + nlohmann::json_schema::json_validator Validator; + try { + static const nlohmann::json TaintConfigSchema = +#include "../config/TaintConfigSchema.json" + ; + + Validator.set_root_schema(TaintConfigSchema); // insert root-schema + } catch (const std::exception &E) { + PHASAR_LOG_LEVEL(ERROR, + "Validation of schema failed, here is why: " << E.what()); + return std::nullopt; + } + + // a custom error handler + class CustomJsonErrorHandler + : public nlohmann::json_schema::basic_error_handler { + void error(const nlohmann::json::json_pointer &Pointer, + const nlohmann::json &Instance, + const std::string &Message) override { + nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, + Message); + PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() + << "' - '" << Instance << "': " << Message); + } + }; + + CustomJsonErrorHandler Err; + Validator.validate(*TaintConfig, Err); + if (Err) { + TaintConfig.reset(); + return std::nullopt; + } + + nlohmann::json Config = *TaintConfig; + TaintConfigData Data; + + std::vector Functions; + handleFunctions(Config, Data.Functions); + + std::vector Variables; + handleVariables(Config, Data.Variables); + + return Data; } diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp deleted file mode 100644 index d90f26ccd..000000000 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" - -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" -#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/IO.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/NlohmannLogging.h" - -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/raw_ostream.h" - -#include "nlohmann/json-schema.hpp" -#include "nlohmann/json.hpp" - -#include - -namespace psr { - -TaintConfigData::TaintConfigData(const std::string &Filepath) { - std::optional TaintConfig = readJsonFile(Filepath); - nlohmann::json_schema::json_validator Validator; - try { - static const nlohmann::json TaintConfigSchema = -#include "../config/TaintConfigSchema.json" - ; - - Validator.set_root_schema(TaintConfigSchema); // insert root-schema - } catch (const std::exception &E) { - PHASAR_LOG_LEVEL(ERROR, - "Validation of schema failed, here is why: " << E.what()); - return; - } - - // a custom error handler - class CustomJsonErrorHandler - : public nlohmann::json_schema::basic_error_handler { - void error(const nlohmann::json::json_pointer &Pointer, - const nlohmann::json &Instance, - const std::string &Message) override { - nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, - Message); - PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() - << "' - '" << Instance << "': " << Message); - } - }; - - CustomJsonErrorHandler Err; - Validator.validate(*TaintConfig, Err); - if (Err) { - TaintConfig.reset(); - return; - } - - if (!TaintConfig) { - llvm::errs() - << "[TaintConfigData::TaintConfigData()]: TaintConfigData is null!"; - return; - }; - - nlohmann::json Config = *TaintConfig; - - // handle functions - if (Config.contains("functions")) { - for (const auto &Func : Config["functions"]) { - FunctionData Data = FunctionData(); - bool FuncPushBackFlag = false; - - if (Func.contains("name")) { - Data.Name = Func["name"].get(); - FuncPushBackFlag = true; - } - - if (Func.contains("ret")) { - Data.ReturnType = Func["ret"].get(); - FuncPushBackFlag = true; - } - - if (Func.contains("params") && Func["params"].contains("source")) { - for (const auto &Curr : Func["params"]["source"]) { - Data.SourceValues.push_back(Curr.get()); - } - FuncPushBackFlag = true; - } - - if (Func.contains("params") && Func["params"].contains("sink")) { - for (const auto &Curr : Func["params"]["sink"]) { - if (Curr.is_number()) { - Data.SinkValues.push_back(Curr.get()); - } else if (Curr.is_string() && Curr.get() == "all") { - Data.HasAllSinkParam = true; - } else { - llvm::errs() << "[TaintConfigData::TaintConfigData()]: " - "Unknown sink string parameter!"; - } - } - FuncPushBackFlag = true; - } - - if (Func.contains("params") && Func["params"].contains("sanitizer")) { - for (const auto &Curr : Func["params"]["sanitizer"]) { - Data.SanitizerValues.push_back(Curr.get()); - } - FuncPushBackFlag = true; - } - - if (FuncPushBackFlag) { - Functions.push_back(std::move(Data)); - } - } - } - - // handle variables - if (Config.contains("variables")) { - for (const auto &Var : Config["variables"]) { - VariableData Data = VariableData(); - bool VarPushBackFlag = false; - if (Var.contains("line")) { - Data.Line = Var["line"].get(); - VarPushBackFlag = true; - } - - if (Var.contains("name")) { - Data.Name = Var["name"].get(); - VarPushBackFlag = true; - } - - if (Var.contains("scope")) { - Data.Scope = Var["scope"].get(); - VarPushBackFlag = true; - } - - if (Var.contains("cat")) { - Data.Cat = Var["cat"].get(); - VarPushBackFlag = true; - } - if (VarPushBackFlag) { - Variables.push_back(std::move(Data)); - } - } - } -} - -} // namespace psr diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index f7d1a827a..7c116d2af 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -54,20 +54,19 @@ class IDETaintAnalysisTest : public ::testing::Test { IDETaintAnalysisTest() = default; ~IDETaintAnalysisTest() override = default; - void doAnalysis(const llvm::Twine &IRFile, - const map> &GroundTruth, - std::variant Config, - bool DumpResults = false) { + void doAnalysis( + const llvm::Twine &IRFile, const map> &GroundTruth, + std::variant Config, + bool DumpResults = false) { HelperAnalyses HA(IRFile, EntryPoints); auto TC = std::visit(Overloaded{[&](std::monostate) { return LLVMTaintConfig(HA.getProjectIRDB()); }, - [&](json *JS) { - TaintConfigData Data = TaintConfigData(*JS); + [&](TaintConfigData *JS) { LLVMTaintConfig Ret = - LLVMTaintConfig(HA.getProjectIRDB(), Data); + LLVMTaintConfig(HA.getProjectIRDB(), *JS); if (DumpResults) { llvm::errs() << Ret << "\n"; } @@ -122,7 +121,20 @@ TEST_F(IDETaintAnalysisTest, XTaint01_Json) { Gt[7] = {"6"}; - json Config = R"!({ + TaintConfigData Config; + + FunctionData FuncDataMain; + FuncDataMain.Name = "main"; + FuncDataMain.SourceValues.push_back(0); + + FunctionData FuncDataPrint; + FuncDataPrint.Name = "_Z5printi"; + FuncDataPrint.SinkValues.push_back(0); + + Config.Functions.push_back(FuncDataMain); + Config.Functions.push_back(FuncDataPrint); + + json Old = R"!({ "name": "XTaintTest", "version": 1.0, "functions": [ diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 8d3c6285f..39674e0bc 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -230,7 +230,7 @@ TEST_F(TaintConfigTest, Array_01_Json) { const std::string Config = "array_01_config.json"; auto JsonConfig = - psr::TaintConfigData({PathToJsonTaintConfigTestCode.str() + Config}); + psr::parseTaintConfig({PathToJsonTaintConfigTestCode.str() + Config}); psr::LLVMProjectIRDB IR({PathToJsonTaintConfigTestCode + File}); From d1561eca6703eedeb647e68f67b2d11bcd758397 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 8 Sep 2023 20:27:35 +0200 Subject: [PATCH 22/26] added static to handle functions --- lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 2c32c622e..5a8db9212 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -44,8 +44,8 @@ psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { return std::move(*Ret); } -void handleFunctions(const nlohmann::json &JSON, - std::vector &Container) { +static void handleFunctions(const nlohmann::json &JSON, + std::vector &Container) { if (JSON.contains("functions")) { for (const auto &Func : JSON["functions"]) { psr::FunctionData Data = psr::FunctionData(); @@ -96,8 +96,8 @@ void handleFunctions(const nlohmann::json &JSON, } } -void handleVariables(const nlohmann::json &JSON, - std::vector &Container) { +static void handleVariables(const nlohmann::json &JSON, + std::vector &Container) { if (JSON.contains("variables")) { for (const auto &Var : JSON["variables"]) { psr::VariableData Data = psr::VariableData(); From 6fb5b30d85f5d6b01e39d45db93b85c6aa8021c8 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 Sep 2023 14:06:08 +0200 Subject: [PATCH 23/26] cleanup --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 2 +- .../PhasarLLVM/TaintConfig/TaintConfigBase.h | 7 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 15 +- lib/Controller/AnalysisController.cpp | 17 +- lib/PhasarLLVM/TaintConfig/CMakeLists.txt | 2 + .../TaintConfig/LLVMTaintConfig.cpp | 9 +- .../TaintConfig/TaintConfigBase.cpp | 237 ++++++++++-------- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 29 +-- .../TaintConfig/TaintConfigTest.cpp | 6 +- 9 files changed, 168 insertions(+), 156 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index 9fda92774..31115da09 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -19,7 +19,7 @@ namespace psr { class LLVMTaintConfig; class LLVMProjectIRDB; -class TaintConfigData; +struct TaintConfigData; template <> struct TaintConfigTraits { using n_t = const llvm::Instruction *; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h index 478c8df9d..422a06a5b 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h @@ -25,7 +25,7 @@ namespace psr { -enum class TaintCategory { Source, Sink, Sanitizer, None }; +enum class TaintCategory { None, Source, Sink, Sanitizer }; [[nodiscard]] llvm::StringRef to_string(TaintCategory Cat) noexcept; [[nodiscard]] TaintCategory toTaintCategory(llvm::StringRef Str) noexcept; @@ -158,8 +158,9 @@ template class TaintConfigBase { //===----------------------------------------------------------------------===// // Miscellaneous helper functions -TaintConfigData parseTaintConfig(const llvm::Twine &Path); -std::optional parseTaintConfigOrNull(const llvm::Twine &Path); +[[nodiscard]] TaintConfigData parseTaintConfig(const llvm::Twine &Path); +[[nodiscard]] std::optional +parseTaintConfigOrNull(const llvm::Twine &Path) noexcept; } // namespace psr diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index db5779228..1607f22e2 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -15,26 +15,27 @@ namespace psr { struct TaintConfigData; +enum class TaintCategory; class LLVMProjectIRDB; struct FunctionData { - FunctionData() = default; + FunctionData() noexcept = default; std::string Name; - std::string ReturnType; - std::vector SourceValues; - std::vector SinkValues; - std::vector SanitizerValues; + TaintCategory ReturnCat{}; + std::vector SourceValues; + std::vector SinkValues; + std::vector SanitizerValues; bool HasAllSinkParam = false; }; struct VariableData { - VariableData() = default; + VariableData() noexcept = default; size_t Line{}; std::string Name; std::string Scope; - std::string Cat; + TaintCategory Cat{}; }; struct TaintConfigData { diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index a782ef809..1341319a0 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -15,8 +15,8 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/HelperAnalyses.h" #include "phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" +#include "phasar/Utils/NlohmannLogging.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/STLExtras.h" @@ -185,6 +185,10 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { WithResultFileOrStdout("/psr-cg.txt", [this](auto &OS) { HA.getICFG().print(OS); }); } + if (EmitterOptions & AnalysisControllerEmitterOptions::EmitCGAsJson) { + WithResultFileOrStdout( + "/psr-cg.json", [this](auto &OS) { OS << HA.getICFG().getAsJson(); }); + } if (EmitterOptions & (AnalysisControllerEmitterOptions::EmitStatisticsAsJson | @@ -194,6 +198,11 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { GeneralStatisticsAnalysis GSA; const auto &Stats = GSA.runOnModule(*IRDB.getModule()); + if (EmitterOptions & + AnalysisControllerEmitterOptions::EmitStatisticsAsText) { + llvm::outs() << Stats << '\n'; + } + if (EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsJson) { WithResultFileOrStdout("/psr-IrStatistics.json", @@ -205,8 +214,10 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { LLVMTaintConfig AnalysisController::makeTaintConfig() { std::string AnalysisConfigPath = !AnalysisConfigs.empty() ? AnalysisConfigs[0] : ""; - - return LLVMTaintConfig(HA.getProjectIRDB()); + return !AnalysisConfigPath.empty() + ? LLVMTaintConfig(HA.getProjectIRDB(), + parseTaintConfig(AnalysisConfigPath)) + : LLVMTaintConfig(HA.getProjectIRDB()); } } // namespace psr diff --git a/lib/PhasarLLVM/TaintConfig/CMakeLists.txt b/lib/PhasarLLVM/TaintConfig/CMakeLists.txt index f795ea837..81d9ad6a8 100644 --- a/lib/PhasarLLVM/TaintConfig/CMakeLists.txt +++ b/lib/PhasarLLVM/TaintConfig/CMakeLists.txt @@ -5,6 +5,8 @@ set(PHASAR_LINK_LIBS phasar_db phasar_llvm_db phasar_llvm_utils + phasar_controlflow + phasar_llvm_controlflow ) set(LLVM_LINK_COMPONENTS diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 640c9702a..a448f43f1 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -9,6 +9,7 @@ #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "phasar/PhasarLLVM/Utils/Annotation.h" @@ -114,7 +115,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, } // handle a function's return value for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc.ReturnType); + addTaintCategory(User, FunDesc.ReturnCat); } } } @@ -460,8 +461,10 @@ LLVMTaintConfig::makeInitialSeedsImpl() const { InitialSeeds[Inst].insert(Inst); } else if (const auto *Arg = llvm::dyn_cast(SourceValue); Arg && !Arg->getParent()->isDeclaration()) { - const auto *FunFirstInst = &Arg->getParent()->getEntryBlock().front(); - InitialSeeds[FunFirstInst].insert(Arg); + LLVMBasedCFG C; + for (const auto *SP : C.getStartPointsOf(Arg->getParent())) { + InitialSeeds[SP].insert(Arg); + } } } return InitialSeeds; diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 5a8db9212..4fafd4914 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -11,6 +11,9 @@ #include "nlohmann/json-schema.hpp" #include +#include + +#include llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { switch (Cat) { @@ -34,144 +37,160 @@ psr::TaintCategory psr::toTaintCategory(llvm::StringRef Str) noexcept { .Default(TaintCategory::None); } -psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { - auto Ret = parseTaintConfigOrNull(Path); - if (!Ret) { - /*TODO: assertion oder error thrown*/ - llvm::errs() << "ERROR: TaintConfigData is null\n"; - abort(); - } - return std::move(*Ret); -} +static std::optional loadFunc(const nlohmann::json &Func) { + std::optional Data; + Data.emplace(); + bool FunctionNonEmpty = false; -static void handleFunctions(const nlohmann::json &JSON, - std::vector &Container) { - if (JSON.contains("functions")) { - for (const auto &Func : JSON["functions"]) { - psr::FunctionData Data = psr::FunctionData(); - bool FuncPushBackFlag = false; + if (auto NameIt = Func.find("name"); NameIt != Func.end()) { + Data->Name = NameIt->get(); + FunctionNonEmpty = true; + } - if (Func.contains("name")) { - Data.Name = Func["name"].get(); - FuncPushBackFlag = true; - } + if (auto RetIt = Func.find("ret"); RetIt != Func.end()) { + Data->ReturnCat = psr::toTaintCategory(RetIt->get()); + if (Data->ReturnCat == psr::TaintCategory::None) { + throw std::runtime_error( + "Invalid taint category: '" + RetIt->get() + + "'; Must be one of 'source', 'sink' or 'sanitizer'"); + } + FunctionNonEmpty = true; + } - if (Func.contains("ret")) { - Data.ReturnType = Func["ret"].get(); - FuncPushBackFlag = true; + auto ParamsIt = Func.find("params"); + if (ParamsIt != Func.end()) { + const auto &Params = *ParamsIt; + if (auto SrcIt = Params.find("source"); SrcIt != Params.end()) { + for (const auto &Curr : *SrcIt) { + Data->SourceValues.push_back(Curr.get()); + FunctionNonEmpty = true; } + } - if (Func.contains("params") && Func["params"].contains("source")) { - for (const auto &Curr : Func["params"]["source"]) { - Data.SourceValues.push_back(Curr.get()); + if (auto SinkIt = Params.find("sink"); SinkIt != Params.end()) { + for (const auto &Curr : Func["params"]["sink"]) { + if (Curr.is_number()) { + Data->SinkValues.push_back(Curr.get()); + } else if (Curr == "all") { + Data->HasAllSinkParam = true; + } else { + throw std::runtime_error("[TaintConfigData::TaintConfigData()]: " + "Unknown sink string parameter!"); } - FuncPushBackFlag = true; + FunctionNonEmpty = true; } + } - if (Func.contains("params") && Func["params"].contains("sink")) { - for (const auto &Curr : Func["params"]["sink"]) { - if (Curr.is_number()) { - Data.SinkValues.push_back(Curr.get()); - } else if (Curr.is_string() && Curr.get() == "all") { - Data.HasAllSinkParam = true; - } else { - llvm::errs() << "[TaintConfigData::TaintConfigData()]: " - "Unknown sink string parameter!"; - } - } - FuncPushBackFlag = true; + if (auto SanIt = Params.find("sanitizer"); SanIt != Params.end()) { + for (const auto &Curr : *SanIt) { + Data->SanitizerValues.push_back(Curr.get()); + FunctionNonEmpty = true; } + } + } - if (Func.contains("params") && Func["params"].contains("sanitizer")) { - for (const auto &Curr : Func["params"]["sanitizer"]) { - Data.SanitizerValues.push_back(Curr.get()); - } - FuncPushBackFlag = true; - } + if (!FunctionNonEmpty) { + Data.reset(); + } - if (FuncPushBackFlag) { - Container.push_back(std::move(Data)); - } + return Data; +} + +static std::optional loadVar(const nlohmann::json &Var) { + std::optional Data; + Data.emplace(); + + bool VarNonEmpty = false; + if (auto LineIt = Var.find("line"); LineIt != Var.end()) { + Data->Line = LineIt->get(); + VarNonEmpty = true; + } + + if (auto NameIt = Var.find("name"); NameIt != Var.end()) { + Data->Name = NameIt->get(); + VarNonEmpty = true; + } + + if (auto ScopeIt = Var.find("scope"); ScopeIt != Var.end()) { + Data->Scope = ScopeIt->get(); + VarNonEmpty = true; + } + + if (auto CatIt = Var.find("cat"); CatIt != Var.end()) { + Data->Cat = psr::toTaintCategory(CatIt->get()); + if (Data->Cat == psr::TaintCategory::None) { + throw std::runtime_error( + "Invalid taint category: '" + CatIt->get() + + "'; Must be one of 'source', 'sink' or 'sanitizer'"); } + VarNonEmpty = true; } -} -static void handleVariables(const nlohmann::json &JSON, - std::vector &Container) { - if (JSON.contains("variables")) { - for (const auto &Var : JSON["variables"]) { - psr::VariableData Data = psr::VariableData(); - bool VarPushBackFlag = false; - if (Var.contains("line")) { - Data.Line = Var["line"].get(); - VarPushBackFlag = true; - } + if (!VarNonEmpty) { + Data.reset(); + } - if (Var.contains("name")) { - Data.Name = Var["name"].get(); - VarPushBackFlag = true; - } + return Data; +} - if (Var.contains("scope")) { - Data.Scope = Var["scope"].get(); - VarPushBackFlag = true; - } +static void loadFunctions(const nlohmann::json &JSON, + std::vector &Into) { + auto It = JSON.find("functions"); + if (It == JSON.end()) { + return; + } - if (Var.contains("cat")) { - Data.Cat = Var["cat"].get(); - VarPushBackFlag = true; - } - if (VarPushBackFlag) { - Container.push_back(std::move(Data)); - } + for (const auto &Func : *It) { + if (auto FuncData = loadFunc(Func)) { + Into.push_back(std::move(*FuncData)); } } } -std::optional -psr::parseTaintConfigOrNull(const llvm::Twine &Path) { - std::optional TaintConfig = readJsonFile(Path); - nlohmann::json_schema::json_validator Validator; - try { - static const nlohmann::json TaintConfigSchema = -#include "../config/TaintConfigSchema.json" - ; +static void loadVariables(const nlohmann::json &JSON, + std::vector &Into) { - Validator.set_root_schema(TaintConfigSchema); // insert root-schema - } catch (const std::exception &E) { - PHASAR_LOG_LEVEL(ERROR, - "Validation of schema failed, here is why: " << E.what()); - return std::nullopt; + auto It = JSON.find("variables"); + if (It == JSON.end()) { + return; } - // a custom error handler - class CustomJsonErrorHandler - : public nlohmann::json_schema::basic_error_handler { - void error(const nlohmann::json::json_pointer &Pointer, - const nlohmann::json &Instance, - const std::string &Message) override { - nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, - Message); - PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() - << "' - '" << Instance << "': " << Message); + for (const auto &Var : *It) { + if (auto VarData = loadVar(Var)) { + Into.push_back(std::move(*VarData)); } - }; - - CustomJsonErrorHandler Err; - Validator.validate(*TaintConfig, Err); - if (Err) { - TaintConfig.reset(); - return std::nullopt; } +} - nlohmann::json Config = *TaintConfig; - TaintConfigData Data; +psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { + static const nlohmann::json TaintConfigSchema = +#include "../config/TaintConfigSchema.json" + ; + + std::optional TaintConfig = readJsonFile(Path); + nlohmann::json_schema::json_validator Validator; - std::vector Functions; - handleFunctions(Config, Data.Functions); + Validator.set_root_schema(TaintConfigSchema); + Validator.validate(*TaintConfig); + + nlohmann::json Config = *TaintConfig; - std::vector Variables; - handleVariables(Config, Data.Variables); + TaintConfigData Data{}; + loadFunctions(Config, Data.Functions); + loadVariables(Config, Data.Variables); return Data; } + +std::optional +psr::parseTaintConfigOrNull(const llvm::Twine &Path) noexcept { + try { + return parseTaintConfig(Path); + } catch (std::exception &Exc) { + PHASAR_LOG_LEVEL(ERROR, "parseTaintConfig failed: " << Exc.what()); + return std::nullopt; + } catch (...) { + PHASAR_LOG_LEVEL(ERROR, "parseTaintConfig failed with unknown error"); + return std::nullopt; + } +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 7c116d2af..806cb58e2 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -131,33 +131,8 @@ TEST_F(IDETaintAnalysisTest, XTaint01_Json) { FuncDataPrint.Name = "_Z5printi"; FuncDataPrint.SinkValues.push_back(0); - Config.Functions.push_back(FuncDataMain); - Config.Functions.push_back(FuncDataPrint); - - json Old = R"!({ - "name": "XTaintTest", - "version": 1.0, - "functions": [ - { - "file": "xtaint01.cpp", - "name": "main", - "params": { - "source": [ - 0 - ] - } - }, - { - "file": "xtaint01.cpp", - "name": "_Z5printi", - "params": { - "sink": [ - 0 - ] - } - } - ] - })!"_json; + Config.Functions.push_back(std::move(FuncDataMain)); + Config.Functions.push_back(std::move(FuncDataPrint)); doAnalysis({PathToLLFiles + "xtaint01_json_cpp_dbg.ll"}, Gt, &Config); } diff --git a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp index 39674e0bc..bc722d66e 100644 --- a/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp +++ b/unittests/PhasarLLVM/TaintConfig/TaintConfigTest.cpp @@ -236,7 +236,7 @@ TEST_F(TaintConfigTest, Array_01_Json) { // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); - ; + const llvm::Value *I = IR.getInstruction(3); ASSERT_TRUE(TConfig.isSource(I)); } @@ -340,10 +340,10 @@ TEST_F(TaintConfigTest, FunMember_01_Json) { // IR.emitPreprocessedIR(llvm::outs(), false); psr::LLVMTaintConfig TConfig(IR, JsonConfig); llvm::outs() << TConfig << '\n'; - for (const auto &F : IR.getAllFunctions()) { + for (const auto *F : IR.getAllFunctions()) { if (F->getName().contains("foo")) { assert(F); - for (const auto *const User : F->users()) { + for (const auto *User : F->users()) { if (llvm::isa(User)) { ASSERT_TRUE(TConfig.isSource(User)); } From f6acc3cebf01f6d9e74b00e740da148c319dbac7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 Sep 2023 14:10:28 +0200 Subject: [PATCH 24/26] minor --- include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h | 1 - lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 1607f22e2..a1e8184bd 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -16,7 +16,6 @@ namespace psr { struct TaintConfigData; enum class TaintCategory; -class LLVMProjectIRDB; struct FunctionData { FunctionData() noexcept = default; diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 4fafd4914..d654495d0 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -9,12 +9,11 @@ #include "llvm/Support/ErrorHandling.h" #include "nlohmann/json-schema.hpp" +#include "nlohmann/json.hpp" #include #include -#include - llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { switch (Cat) { case TaintCategory::Source: From 27bb560e57f39837fde14ebfc216b911e65786b7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 Sep 2023 14:35:14 +0200 Subject: [PATCH 25/26] Pin swift version --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e4c32f0b..ccf529334 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,6 +55,8 @@ jobs: libclang-rt-14-dev - uses: swift-actions/setup-swift@v1 + with: + swift-version: "5.8.1" - name: Building Phasar in ${{ matrix.build }} with ${{ matrix.compiler[0] }} env: BUILD_TYPE: ${{ matrix.build }} From e2b506ab5b0d1afe7b740000a4530a0b1e5dbe30 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Mon, 2 Oct 2023 18:58:12 +0200 Subject: [PATCH 26/26] Remove unnecessary forward declaration --- include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index a1e8184bd..99a3896f8 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -14,7 +14,6 @@ #include namespace psr { -struct TaintConfigData; enum class TaintCategory; struct FunctionData {