Skip to content

Commit

Permalink
new TaintConfigData structure
Browse files Browse the repository at this point in the history
  • Loading branch information
mxHuber committed Aug 4, 2023
1 parent e5cf0cc commit 53d852b
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 290 deletions.
9 changes: 7 additions & 2 deletions include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#define PHASAR_PHASARLLVM_TAINTCONFIG_LLVMTAINTCONFIG_H

#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h"
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h"

#include "llvm/IR/Instruction.h"

Expand All @@ -32,7 +31,8 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {
friend TaintConfigBase;

public:
explicit LLVMTaintConfig(const psr::TaintConfigData &Config);
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
const psr::TaintConfigData &Config);
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode);
explicit LLVMTaintConfig(
TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB,
Expand Down Expand Up @@ -91,6 +91,11 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {

void printImpl(llvm::raw_ostream &OS) const;

// --- utilities

void addAllFunctions(const LLVMProjectIRDB &IRDB,
const TaintConfigData &Config);

// --- data members

std::unordered_set<const llvm::Value *> SourceValues;
Expand Down
7 changes: 3 additions & 4 deletions include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H

#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h"
#include "phasar/Utils/Nullable.h"

#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"

#include "nlohmann/json.hpp"

#include <map>
#include <set>
#include <type_traits>
Expand Down Expand Up @@ -159,8 +158,8 @@ template <typename Derived> class TaintConfigBase {
//===----------------------------------------------------------------------===//
// Miscellaneous helper functions

nlohmann::json parseTaintConfig(const llvm::Twine &Path);
std::optional<nlohmann::json> parseTaintConfigOrNull(const llvm::Twine &Path);
TaintConfigData parseTaintConfig(const llvm::Twine &Path);
std::optional<TaintConfigData> parseTaintConfigOrNull(const llvm::Twine &Path);

} // namespace psr

Expand Down
61 changes: 28 additions & 33 deletions include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,44 @@
#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H

#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h"
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h"

#include "llvm/IR/Instruction.h"
#include "llvm/IR/Value.h"

#include <string>
#include <unordered_set>

#include <nlohmann/json_fwd.hpp>

namespace psr {
class TaintConfigData;
class LLVMProjectIRDB;

class TaintConfigData {
public:
explicit TaintConfigData(const psr::LLVMProjectIRDB &IRDB,
const nlohmann::json &Config);

void addSourceValue(const llvm::Value *V);
void addSinkValue(const llvm::Value *V);
void addSanitizerValue(const llvm::Value *V);
void addTaintCategory(const llvm::Value *Val, llvm::StringRef AnnotationStr);
void addTaintCategory(const llvm::Value *Val, TaintCategory Annotation);
// --- utilities

void addAllFunctions(const LLVMProjectIRDB &IRDB,
const nlohmann::json &Config);

inline std::unordered_set<const llvm::Value *> getAllSourceValues() const {
return SourceValues;
}
inline std::unordered_set<const llvm::Value *> getAllSinkValues() const {
return SinkValues;
}
inline std::unordered_set<const llvm::Value *> getAllSanitizerValues() const {
return SanitizerValues;
}
TaintConfigData() = default;
explicit TaintConfigData(const std::string &Filepath);

const std::unordered_set<std::string> &getAllFunctionRets() const;
const std::unordered_set<std::string> &getAllFunctionParamsSources() const;
const std::unordered_set<std::string> &getAllFunctionParamsSinks() const;
const std::unordered_set<std::string> &getAllFunctionParamsSanitizers() const;

const std::unordered_set<std::string> &getAllVariableScopes() const;
const std::unordered_set<std::string> &getAllVariableLines() const;
const std::unordered_set<std::string> &getAllVariableCats() const;
const std::unordered_set<std::string> &getAllVariableNames() const;

const std::unordered_set<std::string> &getAllFunctions() const;
const std::unordered_set<std::string> &getAllVariables() const;

private:
std::unordered_set<const llvm::Value *> SourceValues;
std::unordered_set<const llvm::Value *> SinkValues;
std::unordered_set<const llvm::Value *> SanitizerValues;
std::unordered_set<std::string> Functions;
std::unordered_set<std::string> Variables;

std::unordered_set<std::string> FunctionRets;
std::unordered_set<std::string> FunctionParamsSources;
std::unordered_set<std::string> FunctionParamsSinks;
std::unordered_set<std::string> FunctionParamsSanitizers;

std::unordered_set<std::string> VariableScopes;
std::unordered_set<std::string> VariableLines;
std::unordered_set<std::string> VariableCats;
std::unordered_set<std::string> VariableNames;
};

} // namespace psr
Expand Down
13 changes: 0 additions & 13 deletions lib/Controller/AnalysisController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,19 +194,6 @@ void AnalysisController::emitRequestedHelperAnalysisResults() {
GeneralStatisticsAnalysis GSA;
const auto &Stats = GSA.runOnModule(*IRDB.getModule());

if (EmitterOptions &
AnalysisControllerEmitterOptions::EmitStatisticsAsText) {
llvm::outs() << "Module " << IRDB.getModule()->getName() << ":\n";
llvm::outs() << "> LLVM IR instructions:\t" << IRDB.getNumInstructions()
<< "\n";
llvm::outs() << "> Functions:\t\t" << IRDB.getModule()->size() << "\n";
llvm::outs() << "> Global variables:\t" << IRDB.getModule()->global_size()
<< "\n";
llvm::outs() << "> Alloca instructions:\t"
<< Stats.getAllocaInstructions().size() << "\n";
llvm::outs() << "> Call Sites:\t\t" << Stats.getFunctioncalls() << "\n";
}

if (EmitterOptions &
AnalysisControllerEmitterOptions::EmitStatisticsAsJson) {
WithResultFileOrStdout("/psr-IrStatistics.json",
Expand Down
182 changes: 178 additions & 4 deletions lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,187 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"

namespace psr {

LLVMTaintConfig::LLVMTaintConfig(const TaintConfigData &Config) {
SinkValues = Config.getAllSinkValues();
SourceValues = Config.getAllSourceValues();
SanitizerValues = Config.getAllSanitizerValues();
static llvm::SmallVector<const llvm::Function *>
findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) {
llvm::SmallVector<const llvm::Function *> FnDefs;
llvm::DebugInfoFinder DIF;
const auto *M = IRDB.getModule();

DIF.processModule(*M);
for (const auto &SubProgram : DIF.subprograms()) {
if (SubProgram->isDistinct() && !SubProgram->getLinkageName().empty() &&
(SubProgram->getName() == Name ||
SubProgram->getLinkageName() == Name)) {
FnDefs.push_back(IRDB.getFunction(SubProgram->getLinkageName()));
}
}
DIF.reset();

if (FnDefs.empty()) {
const auto *F = IRDB.getFunction(Name);
if (F) {
FnDefs.push_back(F);
}
} else if (FnDefs.size() > 1) {
llvm::errs() << "The function name '" << Name
<< "' is ambiguous. Possible candidates are:\n";
for (const auto *F : FnDefs) {
llvm::errs() << "> " << F->getName() << "\n";
}
llvm::errs() << "Please further specify the function's name, such that it "
"becomes unambiguous\n";
}

return FnDefs;
}

void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,
const TaintConfigData &Config) {
for (const auto &Source : Config.getAllFunctionParamsSources()) {
auto FnDefs = findAllFunctionDefs(IRDB, Source);
}

for (const auto &FunDesc : Config.getAllFunctions()) {
auto FnDefs = findAllFunctionDefs(IRDB, FunDesc);

if (FnDefs.empty()) {
llvm::errs() << "WARNING: Cannot retrieve function " << FunDesc << "\n";
continue;
}

const auto *Fun = FnDefs[0];

// handle a function's parameters
if (FunDesc.contains("params")) {
auto Params = FunDesc["params"];
if (Params.contains("source")) {
for (unsigned Idx : Params["source"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
// Use 'continue' instead of 'break' to get error messages for the
// remaining parameters as well
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
}
}
if (Params.contains("sink")) {
for (const auto &Idx : Params["sink"]) {
if (Idx.is_number()) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
} else if (Idx.is_string()) {
const auto Sinks = Idx.get<std::string>();
if (Sinks == "all") {
for (const auto &Arg : Fun->args()) {
addTaintCategory(&Arg, TaintCategory::Sink);
}
}
}
}
}
if (Params.contains("sanitizer")) {
for (unsigned Idx : Params["sanitizer"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
}
}
}
// handle a function's return value
if (FunDesc.contains("ret")) {
for (const auto &User : Fun->users()) {
addTaintCategory(User, FunDesc["ret"].get<std::string>());
}
}
}
}

LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
const psr::TaintConfigData &Config) {
// handle functions
if (Config.hasFunctions()) {
addAllFunctions(Code, Config);
}

// handle variables
if (Config.hasVariables()) {
// scope can be a function name or a struct.
std::unordered_map<const llvm::Type *, const TaintConfigData>
StructConfigMap;

// read all struct types from config
for (const auto &VarDesc : Config.getAllVariables()) {
llvm::DebugInfoFinder DIF;
const auto *M = Code.getModule();

DIF.processModule(*M);
for (const auto &Ty : DIF.types()) {
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
Ty->getName().equals(VarDesc["scope"].get<std::string>())) {
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
StructConfigMap.insert(
std::pair<const llvm::Type *, const nlohmann::json>(
LlvmStructTy, VarDesc));
}
}
}
DIF.reset();
}

// add corresponding Allocas or getElementPtr instructions to the taint
// category
for (const auto &VarDesc : Config.getAllVariables()) {
for (const auto &Fun : Code.getAllFunctions()) {
for (const auto &I : llvm::instructions(Fun)) {
if (const auto *DbgDeclare =
llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
// matching line number with for Allocas
if (LocalVar->getName().equals(VarDesc) &&
LocalVar->getLine() == VarDesc["line"].get<unsigned int>()) {
addTaintCategory(DbgDeclare->getAddress(),
VarDesc["cat"].get<std::string>());
}
} else if (!StructConfigMap.empty()) {
// Ignorning line numbers for getElementPtr instructions
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
const auto *StType = llvm::dyn_cast<llvm::StructType>(
Gep->getPointerOperandType()->getPointerElementType());
if (StType && StructConfigMap.count(StType)) {
const auto VarDesc = StructConfigMap.at(StType);
auto VarName = VarDesc["name"].get<std::string>();
// using substr to cover the edge case in which same variable
// name is present as a local variable and also as a struct
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
addTaintCategory(Gep, VarDesc["cat"].get<std::string>());
}
}
}
}
}
}
}
}
}

LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode) {
Expand Down
Loading

0 comments on commit 53d852b

Please sign in to comment.