Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Taint Config Serialization #20

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f63b6dc
LLVMTaintConfigYAML class
mxHuber Jul 3, 2023
77d35cd
basic structure
mxHuber Jul 3, 2023
e8b1276
beginning of restructuring
mxHuber Jul 6, 2023
7bf384f
compiles, untested
mxHuber Jul 19, 2023
4b8d770
Constructor init
mxHuber Jul 19, 2023
b1f3d81
minor bug fixes
mxHuber Jul 21, 2023
d51bd3a
new start w better approach
mxHuber Jul 25, 2023
e6e2c30
basic working version
mxHuber Jul 27, 2023
8ce7a13
removed unneccesary includes and functions
mxHuber Jul 27, 2023
135a2b7
new TaintConfigData structure
mxHuber Aug 4, 2023
4ed0efd
fully refactored, doesn't compile
mxHuber Aug 4, 2023
39c1ca2
added func/var structs
mxHuber Aug 17, 2023
595d313
compiling version, tests fail
mxHuber Aug 18, 2023
986eabb
only 3 unittests fail now
mxHuber Aug 24, 2023
3a2909a
fixed a bug with sink values causing a crash
mxHuber Aug 24, 2023
0c83170
all unittests pass
mxHuber Aug 25, 2023
7cba280
review fixes
mxHuber Sep 4, 2023
36a43ef
one faulty unittest remaining
mxHuber Sep 4, 2023
6427805
all unittests pass + myphasartool revert
mxHuber Sep 5, 2023
d072b6b
pre-commit stuff
mxHuber Sep 6, 2023
69fb508
review changes + unittest fixed
mxHuber Sep 6, 2023
d1561ec
added static to handle functions
mxHuber Sep 8, 2023
3d826af
Merge branch 'development' into f-TaintConfigSerialization
mxHuber Sep 13, 2023
6fb5b30
cleanup
fabianbs96 Sep 30, 2023
f6acc3c
minor
fabianbs96 Sep 30, 2023
27bb560
Pin swift version
fabianbs96 Sep 30, 2023
e2b506a
Remove unnecessary forward declaration
fabianbs96 Oct 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ jobs:
libclang-rt-14-dev

- uses: swift-actions/setup-swift@v1
with:
swift-version: "5.8.1"
- name: Building Phasar in ${{ matrix.build }} with ${{ matrix.compiler[0] }}
env:
BUILD_TYPE: ${{ matrix.build }}
Expand Down
5 changes: 3 additions & 2 deletions include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace psr {
class LLVMTaintConfig;
class LLVMProjectIRDB;
struct TaintConfigData;

template <> struct TaintConfigTraits<LLVMTaintConfig> {
using n_t = const llvm::Instruction *;
Expand All @@ -31,7 +32,7 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {

public:
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
const nlohmann::json &Config);
const psr::TaintConfigData &Config);
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode);
explicit LLVMTaintConfig(
TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB,
Expand Down Expand Up @@ -93,7 +94,7 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {
// --- utilities

void addAllFunctions(const LLVMProjectIRDB &IRDB,
const nlohmann::json &Config);
const TaintConfigData &Config);

// --- data members

Expand Down
10 changes: 5 additions & 5 deletions include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,22 @@
#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H

#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h"
#include "phasar/Utils/Nullable.h"

#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"

#include "nlohmann/json.hpp"

#include <map>
#include <set>
#include <type_traits>
#include <utility>

namespace psr {

enum class TaintCategory { Source, Sink, Sanitizer, None };
enum class TaintCategory { None, Source, Sink, Sanitizer };

[[nodiscard]] llvm::StringRef to_string(TaintCategory Cat) noexcept;
[[nodiscard]] TaintCategory toTaintCategory(llvm::StringRef Str) noexcept;
Expand Down Expand Up @@ -159,8 +158,9 @@ template <typename Derived> class TaintConfigBase {
//===----------------------------------------------------------------------===//
// Miscellaneous helper functions

nlohmann::json parseTaintConfig(const llvm::Twine &Path);
std::optional<nlohmann::json> parseTaintConfigOrNull(const llvm::Twine &Path);
[[nodiscard]] TaintConfigData parseTaintConfig(const llvm::Twine &Path);
[[nodiscard]] std::optional<TaintConfigData>
parseTaintConfigOrNull(const llvm::Twine &Path) noexcept;

} // namespace psr

Expand Down
46 changes: 46 additions & 0 deletions include/phasar/PhasarLLVM/TaintConfig/TaintConfigData.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/******************************************************************************
* Copyright (c) 2023 Fabian Schiebel.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of LICENSE.txt.
*
* Contributors:
* Maximilian Leo Huber and others
*****************************************************************************/

#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H

#include <string>
#include <vector>

namespace psr {
enum class TaintCategory;

struct FunctionData {
FunctionData() noexcept = default;

std::string Name;
TaintCategory ReturnCat{};
std::vector<uint32_t> SourceValues;
std::vector<uint32_t> SinkValues;
std::vector<uint32_t> SanitizerValues;
bool HasAllSinkParam = false;
};

struct VariableData {
VariableData() noexcept = default;

size_t Line{};
std::string Name;
std::string Scope;
TaintCategory Cat{};
};

struct TaintConfigData {
std::vector<FunctionData> Functions;
std::vector<VariableData> Variables;
};

} // namespace psr

#endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H
197 changes: 88 additions & 109 deletions lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
#include "phasar/PhasarLLVM/Utils/Annotation.h"
#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h"
#include "phasar/Utils/Logger.h"
#include "phasar/Utils/NlohmannLogging.h"

#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"

#include <string>

namespace psr {

static llvm::SmallVector<const llvm::Function *>
Expand Down Expand Up @@ -59,9 +60,9 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) {
}

void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,
const nlohmann::json &Config) {
for (const auto &FunDesc : Config["functions"]) {
auto Name = FunDesc["name"].get<std::string>();
const TaintConfigData &Config) {
for (const auto &FunDesc : Config.Functions) {
const auto &Name = FunDesc.Name;

auto FnDefs = findAllFunctionDefs(IRDB, Name);

Expand All @@ -72,127 +73,105 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,

const auto *Fun = FnDefs[0];

// handle a function's parameters
if (FunDesc.contains("params")) {
auto Params = FunDesc["params"];
if (Params.contains("source")) {
for (unsigned Idx : Params["source"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
// Use 'continue' instead of 'break' to get error messages for the
// remaining parameters as well
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
}
// handle a function's source parameters
for (const auto &Idx : FunDesc.SourceValues) {
if (Idx >= Fun->arg_size()) {
llvm::errs() << "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
// Use 'continue' instead of 'break' to get error messages for the
// remaining parameters as well
continue;
}
if (Params.contains("sink")) {
for (const auto &Idx : Params["sink"]) {
if (Idx.is_number()) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
} else if (Idx.is_string()) {
const auto Sinks = Idx.get<std::string>();
if (Sinks == "all") {
for (const auto &Arg : Fun->args()) {
addTaintCategory(&Arg, TaintCategory::Sink);
}
}
}
}

addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
}
for (const auto &Idx : FunDesc.SinkValues) {
if (Idx >= Fun->arg_size()) {
llvm::errs() << "ERROR: The sink-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
if (Params.contains("sanitizer")) {
for (unsigned Idx : Params["sanitizer"]) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The source-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
}

addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
}

if (FunDesc.HasAllSinkParam) {
for (const auto &Arg : Fun->args()) {
addTaintCategory(&Arg, TaintCategory::Sink);
}
}
// handle a function's return value
if (FunDesc.contains("ret")) {
for (const auto &User : Fun->users()) {
addTaintCategory(User, FunDesc["ret"].get<std::string>());

for (const auto &Idx : FunDesc.SanitizerValues) {
if (Idx >= Fun->arg_size()) {
llvm::errs()
<< "ERROR: The sanitizer-function parameter index is out of "
"bounds: "
<< Idx << "\n";
continue;
}
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
}
// handle a function's return value
for (const auto &User : Fun->users()) {
addTaintCategory(User, FunDesc.ReturnCat);
}
}
}

LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
const nlohmann::json &Config) {
const TaintConfigData &Config) {
// handle functions
if (Config.contains("functions")) {
addAllFunctions(Code, Config);
}
addAllFunctions(Code, Config);

// handle variables
if (Config.contains("variables")) {
// scope can be a function name or a struct.
std::unordered_map<const llvm::Type *, const nlohmann::json>
StructConfigMap;

// read all struct types from config
for (const auto &VarDesc : Config["variables"]) {
llvm::DebugInfoFinder DIF;
const auto *M = Code.getModule();

DIF.processModule(*M);
for (const auto &Ty : DIF.types()) {
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
Ty->getName().equals(VarDesc["scope"].get<std::string>())) {
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
StructConfigMap.insert(
std::pair<const llvm::Type *, const nlohmann::json>(
LlvmStructTy, VarDesc));
}
// scope can be a function name or a struct.
std::unordered_map<const llvm::Type *, const std::string> StructConfigMap;

// read all struct types from config
size_t Counter = 0;
for (const auto &VarDesc : Config.Variables) {
llvm::DebugInfoFinder DIF;
const auto *M = Code.getModule();

DIF.processModule(*M);
for (const auto &Ty : DIF.types()) {
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
Ty->getName().equals(VarDesc.Scope)) {
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
StructConfigMap.insert(
std::pair<const llvm::Type *, const std::string>(LlvmStructTy,
VarDesc.Name));
}
}
DIF.reset();
}

// add corresponding Allocas or getElementPtr instructions to the taint
// category
for (const auto &VarDesc : Config["variables"]) {
for (const auto &Fun : Code.getAllFunctions()) {
for (const auto &I : llvm::instructions(Fun)) {
if (const auto *DbgDeclare =
llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
// matching line number with for Allocas
if (LocalVar->getName().equals(
VarDesc["name"].get<std::string>()) &&
LocalVar->getLine() == VarDesc["line"].get<unsigned int>()) {
addTaintCategory(DbgDeclare->getAddress(),
VarDesc["cat"].get<std::string>());
}
} else if (!StructConfigMap.empty()) {
// Ignorning line numbers for getElementPtr instructions
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
const auto *StType = llvm::dyn_cast<llvm::StructType>(
Gep->getPointerOperandType()->getPointerElementType());
if (StType && StructConfigMap.count(StType)) {
const auto VarDesc = StructConfigMap.at(StType);
auto VarName = VarDesc["name"].get<std::string>();
// using substr to cover the edge case in which same variable
// name is present as a local variable and also as a struct
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
addTaintCategory(Gep, VarDesc["cat"].get<std::string>());
}
DIF.reset();
}
// add corresponding Allocas or getElementPtr instructions to the taint
// category
for (const auto &VarDesc : Config.Variables) {
for (const auto &Fun : Code.getAllFunctions()) {
for (const auto &I : llvm::instructions(Fun)) {
if (const auto *DbgDeclare = llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
// matching line number with for Allocas
if (LocalVar->getName().equals(VarDesc.Name) &&
LocalVar->getLine() == VarDesc.Line) {
addTaintCategory(DbgDeclare->getAddress(), VarDesc.Cat);
}
} else if (!StructConfigMap.empty()) {
// Ignorning line numbers for getElementPtr instructions
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
const auto *StType = llvm::dyn_cast<llvm::StructType>(
Gep->getPointerOperandType()->getPointerElementType());
if (StType && StructConfigMap.count(StType)) {
auto VarName = StructConfigMap.at(StType);
// using substr to cover the edge case in which same variable
// name is present as a local variable and also as a struct
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
addTaintCategory(Gep, VarDesc.Cat);
}
}
}
Expand Down
Loading
Loading