From 53d852b0445ed683b625dfce0f559f22ac8605e7 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 4 Aug 2023 11:13:27 +0200 Subject: [PATCH] new TaintConfigData structure --- .../PhasarLLVM/TaintConfig/LLVMTaintConfig.h | 9 +- .../PhasarLLVM/TaintConfig/TaintConfigBase.h | 7 +- .../PhasarLLVM/TaintConfig/TaintConfigData.h | 61 ++-- lib/Controller/AnalysisController.cpp | 13 - .../TaintConfig/LLVMTaintConfig.cpp | 182 +++++++++- .../TaintConfig/TaintConfigBase.cpp | 41 +-- .../TaintConfig/TaintConfigData.cpp | 324 +++++++----------- 7 files changed, 347 insertions(+), 290 deletions(-) diff --git a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h index c2c56b7ea..9fda92774 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h +++ b/include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h @@ -11,7 +11,6 @@ #define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIG_H #include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "llvm/IR/Instruction.h" @@ -32,7 +31,8 @@ class LLVMTaintConfig : public TaintConfigBase { friend TaintConfigBase; public: - explicit LLVMTaintConfig(const psr::TaintConfigData &Config); + explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const psr::TaintConfigData &Config); explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode); explicit LLVMTaintConfig( TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB, @@ -91,6 +91,11 @@ class LLVMTaintConfig : public TaintConfigBase { void printImpl(llvm::raw_ostream &OS) const; + // --- utilities + + void addAllFunctions(const LLVMProjectIRDB &IRDB, + const TaintConfigData &Config); + // --- data members std::unordered_set SourceValues; diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h index 94979d2af..478c8df9d 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h @@ -10,6 +10,7 @@ #ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h" #include "phasar/Utils/Nullable.h" #include "llvm/ADT/FunctionExtras.h" @@ -17,8 +18,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" -#include "nlohmann/json.hpp" - #include #include #include @@ -159,8 +158,8 @@ template class TaintConfigBase { //===----------------------------------------------------------------------===// // Miscellaneous helper functions -nlohmann::json parseTaintConfig(const llvm::Twine &Path); -std::optional parseTaintConfigOrNull(const llvm::Twine &Path); +TaintConfigData parseTaintConfig(const llvm::Twine &Path); +std::optional parseTaintConfigOrNull(const llvm::Twine &Path); } // namespace psr diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h index 199f391e5..317e9751d 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h @@ -10,49 +10,44 @@ #ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H #define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" - -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Value.h" - +#include #include -#include - namespace psr { class TaintConfigData; class LLVMProjectIRDB; class TaintConfigData { public: - explicit TaintConfigData(const psr::LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - - void addSourceValue(const llvm::Value *V); - void addSinkValue(const llvm::Value *V); - void addSanitizerValue(const llvm::Value *V); - void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr); - void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation); - // --- utilities - - void addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config); - - inline std::unordered_set getAllSourceValues() const { - return SourceValues; - } - inline std::unordered_set getAllSinkValues() const { - return SinkValues; - } - inline std::unordered_set getAllSanitizerValues() const { - return SanitizerValues; - } + TaintConfigData() = default; + explicit TaintConfigData(const std::string &Filepath); + + const std::unordered_set &getAllFunctionRets() const; + const std::unordered_set &getAllFunctionParamsSources() const; + const std::unordered_set &getAllFunctionParamsSinks() const; + const std::unordered_set &getAllFunctionParamsSanitizers() const; + + const std::unordered_set &getAllVariableScopes() const; + const std::unordered_set &getAllVariableLines() const; + const std::unordered_set &getAllVariableCats() const; + const std::unordered_set &getAllVariableNames() const; + + const std::unordered_set &getAllFunctions() const; + const std::unordered_set &getAllVariables() const; private: - std::unordered_set SourceValues; - std::unordered_set SinkValues; - std::unordered_set SanitizerValues; + std::unordered_set Functions; + std::unordered_set Variables; + + std::unordered_set FunctionRets; + std::unordered_set FunctionParamsSources; + std::unordered_set FunctionParamsSinks; + std::unordered_set FunctionParamsSanitizers; + + std::unordered_set VariableScopes; + std::unordered_set VariableLines; + std::unordered_set VariableCats; + std::unordered_set VariableNames; }; } // namespace psr diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index 519293db9..a782ef809 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -194,19 +194,6 @@ void AnalysisController::emitRequestedHelperAnalysisResults() { GeneralStatisticsAnalysis GSA; const auto &Stats = GSA.runOnModule(*IRDB.getModule()); - if (EmitterOptions & - AnalysisControllerEmitterOptions::EmitStatisticsAsText) { - llvm::outs() << "Module " << IRDB.getModule()->getName() << ":\n"; - llvm::outs() << "> LLVM IR instructions:\t" << IRDB.getNumInstructions() - << "\n"; - llvm::outs() << "> Functions:\t\t" << IRDB.getModule()->size() << "\n"; - llvm::outs() << "> Global variables:\t" << IRDB.getModule()->global_size() - << "\n"; - llvm::outs() << "> Alloca instructions:\t" - << Stats.getAllocaInstructions().size() << "\n"; - llvm::outs() << "> Call Sites:\t\t" << Stats.getFunctioncalls() << "\n"; - } - if (EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsJson) { WithResultFileOrStdout("/psr-IrStatistics.json", diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index ef56f86c4..dd1604389 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -20,13 +20,187 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" namespace psr { -LLVMTaintConfig::LLVMTaintConfig(const TaintConfigData &Config) { - SinkValues = Config.getAllSinkValues(); - SourceValues = Config.getAllSourceValues(); - SanitizerValues = Config.getAllSanitizerValues(); +static llvm::SmallVector +findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { + llvm::SmallVector FnDefs; + llvm::DebugInfoFinder DIF; + const auto *M = IRDB.getModule(); + + DIF.processModule(*M); + for (const auto &SubProgram : DIF.subprograms()) { + if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && + (SubProgram->getName() == Name || + SubProgram->getLinkageName() == Name)) { + FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); + } + } + DIF.reset(); + + if (FnDefs.empty()) { + const auto *F = IRDB.getFunction(Name); + if (F) { + FnDefs.push_back(F); + } + } else if (FnDefs.size() > 1) { + llvm::errs() << "The function name '" << Name + << "' is ambiguous. Possible candidates are:\n"; + for (const auto *F : FnDefs) { + llvm::errs() << "> " << F->getName() << "\n"; + } + llvm::errs() << "Please further specify the function's name, such that it " + "becomes unambiguous\n"; + } + + return FnDefs; +} + +void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, + const TaintConfigData &Config) { + for (const auto &Source : Config.getAllFunctionParamsSources()) { + auto FnDefs = findAllFunctionDefs(IRDB, Source); + } + + for (const auto &FunDesc : Config.getAllFunctions()) { + auto FnDefs = findAllFunctionDefs(IRDB, FunDesc); + + if (FnDefs.empty()) { + llvm::errs() << "WARNING: Cannot retrieve function " << FunDesc << "\n"; + continue; + } + + const auto *Fun = FnDefs[0]; + + // handle a function's parameters + if (FunDesc.contains("params")) { + auto Params = FunDesc["params"]; + if (Params.contains("source")) { + for (unsigned Idx : Params["source"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + // Use 'continue' instead of 'break' to get error messages for the + // remaining parameters as well + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Source); + } + } + if (Params.contains("sink")) { + for (const auto &Idx : Params["sink"]) { + if (Idx.is_number()) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); + } else if (Idx.is_string()) { + const auto Sinks = Idx.get(); + if (Sinks == "all") { + for (const auto &Arg : Fun->args()) { + addTaintCategory(&Arg, TaintCategory::Sink); + } + } + } + } + } + if (Params.contains("sanitizer")) { + for (unsigned Idx : Params["sanitizer"]) { + if (Idx >= Fun->arg_size()) { + llvm::errs() + << "ERROR: The source-function parameter index is out of " + "bounds: " + << Idx << "\n"; + continue; + } + addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); + } + } + } + // handle a function's return value + if (FunDesc.contains("ret")) { + for (const auto &User : Fun->users()) { + addTaintCategory(User, FunDesc["ret"].get()); + } + } + } +} + +LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code, + const psr::TaintConfigData &Config) { + // handle functions + if (Config.hasFunctions()) { + addAllFunctions(Code, Config); + } + + // handle variables + if (Config.hasVariables()) { + // scope can be a function name or a struct. + std::unordered_map + StructConfigMap; + + // read all struct types from config + for (const auto &VarDesc : Config.getAllVariables()) { + llvm::DebugInfoFinder DIF; + const auto *M = Code.getModule(); + + DIF.processModule(*M); + for (const auto &Ty : DIF.types()) { + if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && + Ty->getName().equals(VarDesc["scope"].get())) { + for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { + StructConfigMap.insert( + std::pair( + LlvmStructTy, VarDesc)); + } + } + } + DIF.reset(); + } + + // add corresponding Allocas or getElementPtr instructions to the taint + // category + for (const auto &VarDesc : Config.getAllVariables()) { + for (const auto &Fun : Code.getAllFunctions()) { + for (const auto &I : llvm::instructions(Fun)) { + if (const auto *DbgDeclare = + llvm::dyn_cast(&I)) { + const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); + // matching line number with for Allocas + if (LocalVar->getName().equals(VarDesc) && + LocalVar->getLine() == VarDesc["line"].get()) { + addTaintCategory(DbgDeclare->getAddress(), + VarDesc["cat"].get()); + } + } else if (!StructConfigMap.empty()) { + // Ignorning line numbers for getElementPtr instructions + if (const auto *Gep = llvm::dyn_cast(&I)) { + const auto *StType = llvm::dyn_cast( + Gep->getPointerOperandType()->getPointerElementType()); + if (StType && StructConfigMap.count(StType)) { + const auto VarDesc = StructConfigMap.at(StType); + auto VarName = VarDesc["name"].get(); + // using substr to cover the edge case in which same variable + // name is present as a local variable and also as a struct + // member variable. (Ex. JsonConfig/fun_member_02.cpp) + if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { + addTaintCategory(Gep, VarDesc["cat"].get()); + } + } + } + } + } + } + } + } } LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) { diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp index 34e2d2556..01e462415 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigBase.cpp @@ -7,9 +7,6 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include "nlohmann/json-schema.hpp" -#include "nlohmann/json.hpp" - #include llvm::StringRef psr::to_string(TaintCategory Cat) noexcept { @@ -34,46 +31,10 @@ psr::TaintCategory psr::toTaintCategory(llvm::StringRef Str) noexcept { .Default(TaintCategory::None); } -nlohmann::json psr::parseTaintConfig(const llvm::Twine &Path) { +psr::TaintConfigData psr::parseTaintConfig(const llvm::Twine &Path) { auto Ret = parseTaintConfigOrNull(Path); if (!Ret) { return {}; } return std::move(*Ret); } - -std::optional -psr::parseTaintConfigOrNull(const llvm::Twine &Path) { - std::optional TaintConfig = readJsonFile(Path); - nlohmann::json_schema::json_validator Validator; - try { - static const nlohmann::json TaintConfigSchema = -#include "../config/TaintConfigSchema.json" - ; - - Validator.set_root_schema(TaintConfigSchema); // insert root-schema - } catch (const std::exception &E) { - PHASAR_LOG_LEVEL(ERROR, - "Validation of schema failed, here is why: " << E.what()); - return std::nullopt; - } - - // a custom error handler - class CustomJsonErrorHandler - : public nlohmann::json_schema::basic_error_handler { - void error(const nlohmann::json::json_pointer &Pointer, - const nlohmann::json &Instance, - const std::string &Message) override { - nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, - Message); - PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() - << "' - '" << Instance << "': " << Message); - } - }; - CustomJsonErrorHandler Err; - Validator.validate(*TaintConfig, Err); - if (Err) { - TaintConfig.reset(); - } - return TaintConfig; -} diff --git a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp index 78af9635c..1422cd5d3 100644 --- a/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp +++ b/lib/PhasarLLVM/TaintConfig/TaintConfigData.cpp @@ -11,230 +11,166 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" +#include "nlohmann/json-schema.hpp" +#include "nlohmann/json.hpp" + #include namespace psr { -TaintConfigData::TaintConfigData(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - // handle functions - if (Config.contains("functions")) { - addAllFunctions(IRDB, Config); +std::optional +parseTaintConfigOrNull(const llvm::Twine &Path) { + std::optional TaintConfig = readJsonFile(Path); + nlohmann::json_schema::json_validator Validator; + try { + static const nlohmann::json TaintConfigSchema = +#include "../config/TaintConfigSchema.json" + ; + + Validator.set_root_schema(TaintConfigSchema); // insert root-schema + } catch (const std::exception &E) { + PHASAR_LOG_LEVEL(ERROR, + "Validation of schema failed, here is why: " << E.what()); + return std::nullopt; } - // handle variables - if (Config.contains("variables")) { - // scope can be a function name or a struct. - std::unordered_map - StructConfigMap; - - // read all struct types from config - for (const auto &VarDesc : Config["variables"]) { - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); - - DIF.processModule(*M); - for (const auto &Ty : DIF.types()) { - if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type && - Ty->getName().equals(VarDesc["scope"].get())) { - for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) { - StructConfigMap.insert( - std::pair( - LlvmStructTy, VarDesc)); - } - } - } - DIF.reset(); + // a custom error handler + class CustomJsonErrorHandler + : public nlohmann::json_schema::basic_error_handler { + void error(const nlohmann::json::json_pointer &Pointer, + const nlohmann::json &Instance, + const std::string &Message) override { + nlohmann::json_schema::basic_error_handler::error(Pointer, Instance, + Message); + PHASAR_LOG_LEVEL(ERROR, Pointer.to_string() + << "' - '" << Instance << "': " << Message); } + }; + CustomJsonErrorHandler Err; + Validator.validate(*TaintConfig, Err); + if (Err) { + TaintConfig.reset(); + } + return std::optional(Path.str()); +} - // add corresponding Allocas or getElementPtr instructions to the taint - // category - for (const auto &VarDesc : Config["variables"]) { - for (const auto &Fun : IRDB.getAllFunctions()) { - for (const auto &I : llvm::instructions(Fun)) { - if (const auto *DbgDeclare = - llvm::dyn_cast(&I)) { - const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable(); - // matching line number with for Allocas - if (LocalVar->getName().equals( - VarDesc["name"].get()) && - LocalVar->getLine() == VarDesc["line"].get()) { - addTaintCategory(DbgDeclare->getAddress(), - VarDesc["cat"].get()); - } - } else if (!StructConfigMap.empty()) { - // Ignorning line numbers for getElementPtr instructions - if (const auto *Gep = llvm::dyn_cast(&I)) { - const auto *StType = llvm::dyn_cast( - Gep->getPointerOperandType()->getPointerElementType()); - if (StType && StructConfigMap.count(StType)) { - const auto VarDesc = StructConfigMap.at(StType); - auto VarName = VarDesc["name"].get(); - // using substr to cover the edge case in which same variable - // name is present as a local variable and also as a struct - // member variable. (Ex. JsonConfig/fun_member_02.cpp) - if (Gep->getName().substr(0, VarName.size()).equals(VarName)) { - addTaintCategory(Gep, VarDesc["cat"].get()); - } - } - } - } - } - } +void findAndAddValue(const nlohmann::json &Config, const std::string &Value, + std::unordered_set &Container) { + if (Config.contains(Value)) { + for (const auto &Curr : Config[Value]) { + Container.insert(Curr); } } } -static llvm::SmallVector -findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { - llvm::SmallVector FnDefs; - llvm::DebugInfoFinder DIF; - const auto *M = IRDB.getModule(); +void addAllFunctionRets(const nlohmann::json &Function, + std::unordered_set &Container) { + findAndAddValue(Function, "ret", Container); +} - DIF.processModule(*M); - for (const auto &SubProgram : DIF.subprograms()) { - if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() && - (SubProgram->getName() == Name || - SubProgram->getLinkageName() == Name)) { - FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName())); - } - } - DIF.reset(); +void addAllFunctionParamsSources(const nlohmann::json &Param, + std::unordered_set &Container) { + findAndAddValue(Param, "source", Container); +} - if (FnDefs.empty()) { - const auto *F = IRDB.getFunction(Name); - if (F) { - FnDefs.push_back(F); - } - } else if (FnDefs.size() > 1) { - llvm::errs() << "The function name '" << Name - << "' is ambiguous. Possible candidates are:\n"; - for (const auto *F : FnDefs) { - llvm::errs() << "> " << F->getName() << "\n"; - } - llvm::errs() << "Please further specify the function's name, such that it " - "becomes unambiguous\n"; - } +void addAllFunctionParamsSinks(const nlohmann::json &Param, + std::unordered_set &Container) { + findAndAddValue(Param, "sink", Container); +} - return FnDefs; +void addAllFunctionParamsSanitizers( + const nlohmann::json &Param, std::unordered_set &Container) { + findAndAddValue(Param, "sanitizer", Container); } -void TaintConfigData::addAllFunctions(const LLVMProjectIRDB &IRDB, - const nlohmann::json &Config) { - for (const auto &FunDesc : Config["functions"]) { - auto Name = FunDesc["name"].get(); +void addAllVariableScopes(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "scope", Container); +} - auto FnDefs = findAllFunctionDefs(IRDB, Name); +void addAllVariableLines(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "line", Container); +} - if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; - continue; - } +void addAllVariableCats(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "cat", Container); +} - const auto *Fun = FnDefs[0]; - - // handle a function's parameters - if (FunDesc.contains("params")) { - auto Params = FunDesc["params"]; - if (Params.contains("source")) { - for (unsigned Idx : Params["source"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - // Use 'continue' instead of 'break' to get error messages for the - // remaining parameters as well - continue; - } - addTaintCategory(Fun->getArg(Idx), "source"); - } - } - if (Params.contains("sink")) { - for (const auto &Idx : Params["sink"]) { - if (Idx.is_number()) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), "Sink"); - } else if (Idx.is_string()) { - const auto Sinks = Idx.get(); - if (Sinks == "all") { - for (const auto &Arg : Fun->args()) { - addTaintCategory(&Arg, "sink"); - } - } - } - } - } - if (Params.contains("sanitizer")) { - for (unsigned Idx : Params["sanitizer"]) { - if (Idx >= Fun->arg_size()) { - llvm::errs() - << "ERROR: The source-function parameter index is out of " - "bounds: " - << Idx << "\n"; - continue; - } - addTaintCategory(Fun->getArg(Idx), "sanitizer"); - } +void addAllVariableNames(const nlohmann::json &Variable, + std::unordered_set &Container) { + findAndAddValue(Variable, "name", Container); +} + +TaintConfigData::TaintConfigData(const std::string &Filepath) { + + nlohmann::json Config(Filepath); + + // handle functions + if (Config.contains("functions")) { + for (auto &Function : Config["functions"]) { + addAllFunctionRets(Function, FunctionRets); + + if (Function.contains("params")) { + addAllFunctionParamsSources(Function["params"], FunctionParamsSources); + addAllFunctionParamsSinks(Function["params"], FunctionParamsSinks); + addAllFunctionParamsSanitizers(Function["params"], + FunctionParamsSanitizers); } } - // handle a function's return value - if (FunDesc.contains("ret")) { - for (const auto &User : Fun->users()) { - addTaintCategory(User, FunDesc["ret"].get()); - } + } + + // handle variables + if (Config.contains("variables")) { + for (auto &Variable : Config["variables"]) { + addAllVariableScopes(Variable, VariableScopes); + addAllVariableLines(Variable, VariableLines); + addAllVariableCats(Variable, VariableCats); + addAllVariableNames(Variable, VariableNames); } } } -// -// --- Own API function implementations -// - -void TaintConfigData::addSourceValue(const llvm::Value *V) { - SourceValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctions() const { + return Functions; } - -void TaintConfigData::addSinkValue(const llvm::Value *V) { - SinkValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctionRets() const { + return FunctionRets; } - -void TaintConfigData::addSanitizerValue(const llvm::Value *V) { - SanitizerValues.insert(V); +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSources() const { + return FunctionParamsSources; } - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - llvm::StringRef AnnotationStr) { - auto TC = toTaintCategory(AnnotationStr); - if (TC == TaintCategory::None) { - PHASAR_LOG_LEVEL(ERROR, "Unknown taint category: " << AnnotationStr); - } else { - addTaintCategory(Val, TC); - } +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSinks() const { + return FunctionParamsSinks; } - -void TaintConfigData::addTaintCategory(const llvm::Value *Val, - TaintCategory Annotation) { - switch (Annotation) { - case TaintCategory::Source: - addSourceValue(Val); - break; - case TaintCategory::Sink: - addSinkValue(Val); - break; - case TaintCategory::Sanitizer: - addSanitizerValue(Val); - break; - default: - // ignore - break; - } +const std::unordered_set & +TaintConfigData::getAllFunctionParamsSanitizers() const { + return FunctionParamsSanitizers; +} +const std::unordered_set & +TaintConfigData::getAllVariables() const { + return Variables; +} +const std::unordered_set & +TaintConfigData::getAllVariableScopes() const { + return VariableScopes; +} +const std::unordered_set & +TaintConfigData::getAllVariableLines() const { + return VariableLines; +} +const std::unordered_set & +TaintConfigData::getAllVariableCats() const { + return VariableCats; +} +const std::unordered_set & +TaintConfigData::getAllVariableNames() const { + return VariableNames; } } // namespace psr \ No newline at end of file