Skip to content

Commit

Permalink
fully refactored, doesn't compile
Browse files Browse the repository at this point in the history
  • Loading branch information
mxHuber committed Aug 4, 2023
1 parent 53d852b commit 784dcdd
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 227 deletions.
51 changes: 28 additions & 23 deletions include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H

#include <string>
#include <unordered_set>
#include <vector>

namespace psr {
class TaintConfigData;
Expand All @@ -22,32 +22,37 @@ class TaintConfigData {
TaintConfigData() = default;
explicit TaintConfigData(const std::string &Filepath);

const std::unordered_set<std::string> &getAllFunctionRets() const;
const std::unordered_set<std::string> &getAllFunctionParamsSources() const;
const std::unordered_set<std::string> &getAllFunctionParamsSinks() const;
const std::unordered_set<std::string> &getAllFunctionParamsSanitizers() const;
[[nodiscard]] const std::vector<std::string> &getAllFunctionNames() const;
[[nodiscard]] const std::vector<std::string> &getAllFunctionRets() const;
[[nodiscard]] const std::vector<std::string> &
getAllFunctionParamsSources() const;
[[nodiscard]] const std::vector<std::string> &
getAllFunctionParamsSinks() const;
[[nodiscard]] const std::vector<std::string> &
getAllFunctionParamsSanitizers() const;

const std::unordered_set<std::string> &getAllVariableScopes() const;
const std::unordered_set<std::string> &getAllVariableLines() const;
const std::unordered_set<std::string> &getAllVariableCats() const;
const std::unordered_set<std::string> &getAllVariableNames() const;
[[nodiscard]] const std::vector<std::string> &getAllVariableScopes() const;
[[nodiscard]] const std::vector<std::string> &getAllVariableLines() const;
[[nodiscard]] const std::vector<std::string> &getAllVariableCats() const;
[[nodiscard]] const std::vector<std::string> &getAllVariableNames() const;

const std::unordered_set<std::string> &getAllFunctions() const;
const std::unordered_set<std::string> &getAllVariables() const;
[[nodiscard]] const std::vector<std::string> &getAllFunctions() const;
[[nodiscard]] const std::vector<std::string> &getAllVariables() const;

private:
std::unordered_set<std::string> Functions;
std::unordered_set<std::string> Variables;

std::unordered_set<std::string> FunctionRets;
std::unordered_set<std::string> FunctionParamsSources;
std::unordered_set<std::string> FunctionParamsSinks;
std::unordered_set<std::string> FunctionParamsSanitizers;

std::unordered_set<std::string> VariableScopes;
std::unordered_set<std::string> VariableLines;
std::unordered_set<std::string> VariableCats;
std::unordered_set<std::string> VariableNames;
std::vector<std::string> Functions;
std::vector<std::string> Variables;

std::vector<std::string> FunctionNames;
std::vector<std::string> FunctionRets;
std::vector<std::string> FunctionParamSources;
std::vector<std::string> FunctionParamSinks;
std::vector<std::string> FunctionParamSanitizers;

std::vector<std::string> VariableScopes;
std::vector<std::string> VariableLines;
std::vector<std::string> VariableCats;
std::vector<std::string> VariableNames;
};

} // namespace psr
Expand Down
181 changes: 56 additions & 125 deletions lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@

#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h"
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h"
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h"
#include "phasar/PhasarLLVM/Utils/Annotation.h"
#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h"
#include "phasar/Utils/Logger.h"
#include "phasar/Utils/NlohmannLogging.h"

#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"

#include <string>

namespace psr {

Expand Down Expand Up @@ -60,145 +61,75 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) {

void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,
const TaintConfigData &Config) {
for (const auto &Source : Config.getAllFunctionParamsSources()) {
auto FnDefs = findAllFunctionDefs(IRDB, Source);
}

int Counter = -1;
for (const auto &FunDesc : Config.getAllFunctions()) {
auto FnDefs = findAllFunctionDefs(IRDB, FunDesc);
Counter++;
auto Name = Config.getAllFunctionNames()[Counter];

auto FnDefs = findAllFunctionDefs(IRDB, Name);

if (FnDefs.empty()) {
llvm::errs() << "WARNING: Cannot retrieve function " << FunDesc << "\n";
llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n";
continue;
}

const auto *Fun = FnDefs[0];

// handle a function's parameters
if (FunDesc.contains("params")) {
auto Params = FunDesc["params"];
if (Params.contains("source")) {
for (unsigned Idx : Params["source"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
// Use 'continue' instead of 'break' to get error messages for the
// remaining parameters as well
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
}
// handle a function's source parameters
for (const auto &Param : Config.getAllFunctionParamsSources()) {
unsigned Idx = std::stoi(Param);

if (Idx >= Fun->arg_size()) {
llvm::errs() << "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
// Use 'continue' instead of 'break' to get error messages for the
// remaining parameters as well
continue;
}
if (Params.contains("sink")) {
for (const auto &Idx : Params["sink"]) {
if (Idx.is_number()) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
} else if (Idx.is_string()) {
const auto Sinks = Idx.get<std::string>();
if (Sinks == "all") {
for (const auto &Arg : Fun->args()) {
addTaintCategory(&Arg, TaintCategory::Sink);
}
}
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
}
for (const auto &Param : Config.getAllFunctionParamsSinks()) {
char *Check;
long Converted = strtol(Param.c_str(), &Check, Param.size());

if (!Check) {
unsigned Idx = std::stoi(Param);
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
}
if (Params.contains("sanitizer")) {
for (unsigned Idx : Params["sanitizer"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
} else {
if (Param == "all") {
for (const auto &Arg : Fun->args()) {
addTaintCategory(&Arg, TaintCategory::Sink);
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
}
}
}
// handle a function's return value
if (FunDesc.contains("ret")) {
for (const auto &User : Fun->users()) {
addTaintCategory(User, FunDesc["ret"].get<std::string>());
}
}
}
}

LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
const psr::TaintConfigData &Config) {
// handle functions
if (Config.hasFunctions()) {
addAllFunctions(Code, Config);
}

// handle variables
if (Config.hasVariables()) {
// scope can be a function name or a struct.
std::unordered_map<const llvm::Type *, const TaintConfigData>
StructConfigMap;

// read all struct types from config
for (const auto &VarDesc : Config.getAllVariables()) {
llvm::DebugInfoFinder DIF;
const auto *M = Code.getModule();

DIF.processModule(*M);
for (const auto &Ty : DIF.types()) {
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
Ty->getName().equals(VarDesc["scope"].get<std::string>())) {
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
StructConfigMap.insert(
std::pair<const llvm::Type *, const nlohmann::json>(
LlvmStructTy, VarDesc));
}
for (const auto &Param : Config.getAllFunctionParamsSanitizers()) {
char *Check;
long Converted = strtol(Param.c_str(), &Check, Param.size());
unsigned Idx = std::stoi(Param);

if (!Check) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
}
DIF.reset();
}

// add corresponding Allocas or getElementPtr instructions to the taint
// category
for (const auto &VarDesc : Config.getAllVariables()) {
for (const auto &Fun : Code.getAllFunctions()) {
for (const auto &I : llvm::instructions(Fun)) {
if (const auto *DbgDeclare =
llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
// matching line number with for Allocas
if (LocalVar->getName().equals(VarDesc) &&
LocalVar->getLine() == VarDesc["line"].get<unsigned int>()) {
addTaintCategory(DbgDeclare->getAddress(),
VarDesc["cat"].get<std::string>());
}
} else if (!StructConfigMap.empty()) {
// Ignorning line numbers for getElementPtr instructions
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
const auto *StType = llvm::dyn_cast<llvm::StructType>(
Gep->getPointerOperandType()->getPointerElementType());
if (StType && StructConfigMap.count(StType)) {
const auto VarDesc = StructConfigMap.at(StType);
auto VarName = VarDesc["name"].get<std::string>();
// using substr to cover the edge case in which same variable
// name is present as a local variable and also as a struct
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
addTaintCategory(Gep, VarDesc["cat"].get<std::string>());
}
}
}
}
}
}
// handle a function's return value
for (const auto &User : Fun->users()) {
addTaintCategory(User, Config.getAllFunctionRets()[Counter]);
}
}
}
Expand Down Expand Up @@ -514,4 +445,4 @@ void LLVMTaintConfig::printImpl(llvm::raw_ostream &OS) const {
}

template class TaintConfigBase<LLVMTaintConfig>;
} // namespace psr
} // namespace psr
Loading

0 comments on commit 784dcdd

Please sign in to comment.