Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DC verification algorithm #444

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
5 changes: 5 additions & 0 deletions .github/composite-actions/download-libraries/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ runs:
sudo ./b2 install --prefix=/usr
shell: bash
if: inputs.install-boost != 'false'
- name: Download frozen
uses: ./.github/composite-actions/download-library
with:
directory: frozen
download-command: git clone https://github.com/serge-sans-paille/frozen.git --depth 1

# Uncomment this if we set up our own git lfs server
# - name: Install git-lfs
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ include_directories(
"src/core/config"
)

include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/" "lib/emhash" "lib/atomicbitvector/include")
include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/" "lib/emhash" "lib/atomicbitvector/include" "lib/frozen/include")

# adding submodules
if (COMPILE_TESTS)
Expand Down
3 changes: 3 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ fi
if [[ ! -d "atomicbitvector" ]] ; then
git clone https://github.com/ekg/atomicbitvector.git --depth 1
fi
if [[ ! -d "frozen" ]] ; then
git clone https://github.com/serge-sans-paille/frozen.git --depth 1
fi

if [[ $NO_TESTS == true ]]; then
PREFIX="$PREFIX -D COMPILE_TESTS=OFF"
Expand Down
20 changes: 20 additions & 0 deletions examples/basic/verifying_dc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import desbordante as db

# # Given denial constraint tells us that if two persons
# live in the same state, the one earning a lower
# salary has a lower tax rate
DC = "!(s.State == t.State and s.Salary < t.Salary and s.FedTaxRate > t.FedTaxRate)"

TABLE = "test_input_data/TestDC1.csv"

# Creating a verificator and loading data in algortihm
verificator = db.dc_verification.algorithms.Default()
verificator.load_data(table=(TABLE, ',', True))

# Algorithm execution
verificator.execute(denial_constraint=DC)

# Obtaining the result
result: bool = verificator.dc_holds()

print("DC " + DC + " holds: " + str(result))
91 changes: 91 additions & 0 deletions src/core/algorithms/dc/model/column_operand.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#pragma once

#include <cstdlib>
#include <exception>
xJoskiy marked this conversation as resolved.
Show resolved Hide resolved
#include <memory>
#include <string>
#include <vector>

#include <boost/functional/hash.hpp>

#include "model/table/column.h"
#include "model/table/relation_data.h"

namespace algos::dc {

// @brief Represents a predicate for Denial Constraint (DC).
//
// DCs involve comparisons between pairs of rows within a dataset.
// A typical DC example, derived from a Functional Dependency such as A -> B,
// is expressed as: `forall t, s in R, not (t.A == s.A and t.B != s.B)`
// This denotes that for any pair of rows in the relation, it should not be the case
// that while the values in column "A" are equal, the values in column "B" are unequal.
//
// A predicate in this context (e.g., t.A == s.A) comprises three elements to be fully
// represented: the column operand from the first tuple ("t.A"), the comparison operator
// ("=="), and the column operand from the second tuple ("s.A"). The `ColumnOperand` class
// encapsulates the column operand part of a predicate, such as "t.A" or "s.A".
//
// The class distinguishes between operands derived from the first tuple (t) and those
// from the second tuple (s) using a boolean flag `tuple_`, where `true` indicates an
// operand from the first tuple (t), and `false` indicates an operand from the second
// tuple (s).

class ColumnOperand {
private:
Column const* column_;
bool tuple_;

public:
ColumnOperand(Column const* column, bool tuple) : column_(column), tuple_(tuple) {}

ColumnOperand() = default;

// For conversion from "t.ColumnPosition" or "t.ColumnName"
ColumnOperand(std::string operand, RelationalSchema const& schema) {
if (operand[0] != 't' and operand[0] != 's') throw std::logic_error("Unknown tuple name");

tuple_ = operand[0] == 't';
std::string name(operand.begin() + 2, operand.end());
std::vector<std::unique_ptr<Column>> const& cols = schema.GetColumns();
if (cols[0]->GetName() != "") { // Has header
for (std::unique_ptr<Column> const& col : cols) {
if (name == col->GetName()) {
column_ = col.get();
return;
}
}
}

try {
std::string str_ind(operand.begin() + 2, operand.end());
model::ColumnIndex ind = std::stoi(str_ind);
column_ = cols[ind].get();
return;
} catch (std::exception const& e) {
throw std::logic_error("Unknown column index or name");
}
}

bool operator==(ColumnOperand const& rhs) const {
return column_ == rhs.column_ && tuple_ == rhs.tuple_;
}

bool operator!=(ColumnOperand const& rhs) const {
return !(*this == rhs);
}

Column const* GetColumn() const {
return column_;
}

bool GetTuple() const {
return tuple_;
}

std::string ToString() const {
return (tuple_ ? "t." : "s.") + column_->GetName();
}
};

} // namespace algos::dc
87 changes: 87 additions & 0 deletions src/core/algorithms/dc/model/component.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "component.h"

#include <cstddef>
#include <string>

#include "model/types/builtin.h"
#include "operator.h"
#include "types.h"

namespace algos::dc {

namespace mo = model;

bool Component::CompareNumeric(std::byte const* l_val, mo::Type const* lhs_type,
std::byte const* r_val, mo::Type const* rhs_type,
mo::CompareResult res) const {
auto l_type = dynamic_cast<mo::INumericType const*>(lhs_type);
auto r_type = dynamic_cast<mo::INumericType const*>(rhs_type);
return l_type->CompareNumeric(l_val, l_type, r_val, r_type) == res;
}

std::string Component::ToString() const {
if (val_type_ == ValType::kPlusInf) return "+Inf";
if (val_type_ == ValType::kMinusInf) return "-Inf";

mo::TypeId type_id = type_->GetTypeId();
switch (type_id) {
case mo::TypeId::kInt:
return std::to_string(mo::Type::GetValue<mo::Int>(val_));
case mo::TypeId::kDouble:
return std::to_string(mo::Type::GetValue<mo::Double>(val_));
case mo::TypeId::kString:
return mo::Type::GetValue<mo::String>(val_);
default:
assert(false);
__builtin_unreachable();
}
}

bool Component::operator<(Component const& rhs) const {
assert(type_->IsMetrizable() and rhs.type_->IsMetrizable());
assert(type_->IsNumeric() == rhs.type_->IsNumeric()); // both or neither

if (val_type_ < rhs.val_type_) return true;
if (val_type_ > rhs.val_type_) return false;
if (val_type_ != ValType::kFinite) return true;

if (type_->IsNumeric())
return CompareNumeric(val_, type_, rhs.val_, rhs.type_, mo::CompareResult::kLess);

return type_->Compare(val_, rhs.val_) == mo::CompareResult::kLess;
}

bool Component::operator==(Component const& rhs) const {
assert(type_->IsMetrizable() and rhs.type_->IsMetrizable());
assert(type_->IsNumeric() == rhs.type_->IsNumeric()); // both or neither
Comment on lines +55 to +56
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert macro makes use only for debug build. I think that it is better to use exceptions here if necessary.


if (val_type_ != rhs.val_type_) return false;
if (val_type_ != ValType::kFinite) return true;

if (type_->IsNumeric())
return CompareNumeric(val_, type_, rhs.val_, rhs.type_, mo::CompareResult::kEqual);

return type_->Compare(val_, rhs.val_) == mo::CompareResult::kEqual;
}

bool Component::Eval(Component const& lhs, Component const& rhs, Operator const& op) {
switch (op.GetType()) {
case OperatorType::kLess:
return lhs < rhs;
case OperatorType::kLessEqual:
return lhs <= rhs;
case OperatorType::kGreater:
return lhs > rhs;
case OperatorType::kGreaterEqual:
return lhs >= rhs;
case OperatorType::kEqual:
return lhs == rhs;
case OperatorType::kUnequal:
return lhs != rhs;
default:
assert(false);
__builtin_unreachable();
};
}

} // namespace algos::dc
74 changes: 74 additions & 0 deletions src/core/algorithms/dc/model/component.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#pragma once

#include <cstddef>
#include <string>

#include "model/types/builtin.h"
#include "operator.h"
#include "types.h"

namespace algos::dc {

enum class ValType { kMinusInf, kFinite, kPlusInf };

// @brief Component of a k-dimensional point
class Component {
private:
std::byte const* val_;
model::Type const* type_;
ValType val_type_;

bool CompareNumeric(std::byte const* l_val, model::Type const* lhs_type, std::byte const* r_val,
model::Type const* rhs_type, model::CompareResult res) const;

public:
Component() noexcept : val_(nullptr), type_(nullptr), val_type_(ValType::kFinite) {};

Component(std::byte const* value, model::Type const* type,
ValType val_type = ValType::kFinite) noexcept
: val_(value), type_(type), val_type_(val_type) {};

std::string ToString() const;

static bool Eval(Component const& lhs, Component const& rhs, Operator const& op);

bool operator<(Component const& rhs) const;

bool operator==(Component const& rhs) const;
xJoskiy marked this conversation as resolved.
Show resolved Hide resolved

bool operator!=(Component const& rhs) const {
return !(*this == rhs);
}

bool operator<=(Component const& rhs) const {
return *this < rhs or *this == rhs;
}

bool operator>(Component const& rhs) const {
return !(*this <= rhs);
}

bool operator>=(Component const& rhs) const {
return !(*this < rhs);
}

void Swap(Component& rhs) {
std::swap(val_, rhs.val_);
std::swap(type_, rhs.type_);
std::swap(val_type_, rhs.val_type_);
}

ValType& GetValType() {
return val_type_;
}

model::Type const* GetType() const {
return type_;
}

std::byte const* GetVal() {
return val_;
}
};

} // namespace algos::dc
86 changes: 86 additions & 0 deletions src/core/algorithms/dc/model/dc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#pragma once

#include <algorithm>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "model/table/column.h"
#include "model/table/vertical.h"
#include "predicate.h"

namespace algos {

namespace dc {

// TODO: Add constant dc
enum class DCType {
kOneTuple = 0, // Each predicate is of form s.A op s.B or t.A op t.B
kTwoTuples, // Each predicate is of form s.A op t.B
kMixed, // Both kTwoTuples and kOneTuple predicates are in DC
kAllEquality, // All predicates are of form s.A == t.A
kOneInequality // DC is kAllEquality except one predicate of form s.A op t.B
};

class DC {
std::vector<Predicate> predicates_;

public:
DC(std::vector<Predicate>&& predicates) : predicates_(std::move(predicates)) {};
DC(std::vector<Predicate> const& predicates) : predicates_(predicates) {};
DC() = default;

template <class Iter>
DC(Iter first, Iter last) {
while (first != last) predicates_.push_back(*(first++));
}

// returns unique columns indices from each Predicate which satisfy the given predicate
template <class Pred>
std::vector<uint> GetColumnIndicesWithOperator(Pred check) const {
std::set<uint> res;
for (Predicate const& pred : predicates_) {
if (check(pred.GetOperator())) {
Column::IndexType left_ind = pred.GetLeftOperand().GetColumn()->GetIndex();
Column::IndexType right_ind = pred.GetRightOperand().GetColumn()->GetIndex();
res.insert({left_ind, right_ind});
}
}

return std::vector(res.begin(), res.end());
}

std::vector<uint> GetColumnIndices() const {
return GetColumnIndicesWithOperator([](Operator) { return true; });
}

// returns all predicates satisfying the given predicate
template <class Pred>
std::vector<Predicate> GetPredicates(Pred check) const {
std::vector<Predicate> res;
std::copy_if(predicates_.begin(), predicates_.end(), std::back_inserter(res), check);

return res;
}

std::string DCToString() const {
std::string const k_not = "!";
std::string const k_and = " and ";

std::string res;
for (size_t i = 0; i < predicates_.size(); i++) {
res += k_and + predicates_[i].ToString();
}

return k_not + "(" + res + ")";
}

std::vector<Predicate> const& GetPredicates() const {
return predicates_;
};
};

} // namespace dc

} // namespace algos
Loading
Loading