Skip to content

Commit

Permalink
Add DC verifier python bindings and basic example
Browse files Browse the repository at this point in the history
  • Loading branch information
xJoskiy committed Oct 29, 2024
1 parent 5413efa commit 88a8cde
Show file tree
Hide file tree
Showing 14 changed files with 141 additions and 12 deletions.
20 changes: 20 additions & 0 deletions examples/basic/verifying_dc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import desbordante as db

# # Given denial constraint tells us that if two persons
# live in the same state, the one earning a lower
# salary has a lower tax rate
DC = "!(s.State == t.State and s.Salary < t.Salary and s.FedTaxRate > t.FedTaxRate)"

TABLE = "test_input_data/TestDC1.csv"

# Creating a verificator and loading data in algortihm
verificator = db.dc_verification.algorithms.Default()
verificator.load_data(table=(TABLE, ',', True))

# Algorithm execution
verificator.execute(denial_constraint=DC)

# Obtaining the result
result: bool = verificator.dc_holds()

print("DC " + DC + " holds: " + str(result))
47 changes: 47 additions & 0 deletions examples/expert/data_cleaning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import desbordante as db
import networkx as nx
import matplotlib.pyplot as plt
import time

def verify_dc(verificator, dc):
total_start = time.time()
print("Algo execution start")
iterations = 1

algo_start = time.time()
for _ in range(iterations):
verificator.execute(denial_constraint=dc)
algo_time = (time.time() - algo_start) / iterations

print("Algo execution end")

result = verificator.dc_holds()
total_time = time.time() - total_start

print("DC " + dc + " holds: " + str(result))
print("Algo average time elapsed: " + str(algo_time))
print("Total time elapsed: " + str(total_time))


def main():
table = 'TestDC1.csv'

verificator = db.dc_verification.algorithms.Default()
verificator.load_data(table=(table, ',', True))

dc = "!(t.0 == s.0 and t.1 > s.1 and t.2 < s.2)"

verify_dc(verificator, dc)
viols = verificator.get_violations()

G = nx.Graph()
for edge in viols:
G.add_edge(*edge)
print(G)
nx.draw(G, with_labels=True)

plt.show()


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion src/core/algorithms/dc/model/point.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Point {
}

size_t GetIndex() const {
return index;
return index_;
}

Component& operator[](size_t i) {
Expand Down
15 changes: 8 additions & 7 deletions src/core/algorithms/dc/verifier/dc_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,9 @@ bool DCVerifier::VerifyMixed(const DC& dc) {

ProcessMixed(s_predicates, s_tree, t_tree, mixed_dc, i, all_cols, res);
ProcessMixed(t_predicates, t_tree, s_tree, mixed_dc, i, all_cols, res);
if (!res) return false;
}

return true;
return res;
}

void DCVerifier::ProcessMixed(std::vector<Predicate> const& preds, tree& insert_tree,
Expand Down Expand Up @@ -174,9 +173,8 @@ bool DCVerifier::VerifyTwoTuples(const DC& dc) {
auto [box, inv_box] = SearchRanges(dc, tuple);
std::vector<point> search_res = hash[key].QuerySearch(box);
std::vector<point> inv_search_res = hash[key].QuerySearch(inv_box);
if (!search_res.empty() or !inv_search_res.empty()) {
return false;
}

if (!search_res.empty() or !inv_search_res.empty()) return false;

hash[key].Insert(MakePoint(tuple, ineq_cols, i + index_offset_));
}
Expand All @@ -185,17 +183,20 @@ bool DCVerifier::VerifyTwoTuples(const DC& dc) {
}

bool DCVerifier::VerifyAllEquality(const DC& dc) {
bool res = true;
std::unordered_set<size_t> res_tuples;
std::vector<uint> const eq_cols = dc.GetColumnIndices();
for (size_t i = 0; i < data_[0].GetNumRows(); i++) {
if (ContainsNullOrEmpty(eq_cols, i)) continue;
std::vector<std::byte const*> tuple = GetTuple(i);
size_t key = HashTuple(tuple, eq_cols);
if (res_tuples.find(key) != res_tuples.end()) return false;
if (res_tuples.find(key) != res_tuples.end()) {
res = false;
}
res_tuples.insert(key);
}

return true;
return res;
}

bool DCVerifier::VerifyOneInequality(const DC& dc) {
Expand Down
6 changes: 5 additions & 1 deletion src/python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "bind_main_classes.h"
#include "cfd/bind_cfd.h"
#include "data/bind_data_types.h"
#include "dc/bind_dc_verification.h"
#include "dd/bind_split.h"
#include "dynamic/bind_dynamic_fd_verification.h"
#include "fd/bind_fd.h"
Expand Down Expand Up @@ -38,12 +39,15 @@ PYBIND11_MODULE(desbordante, module, pybind11::mod_gil_not_used()) {
el::Loggers::reconfigureAllLoggers(conf);
}

// clang-format off
for (auto bind_func :
{BindMainClasses, BindDataTypes, BindFd, BindCfd, BindAr, BindUcc, BindAc, BindOd, BindNd,
BindFdVerification, BindMfdVerification, BindUccVerification, BindStatistics, BindInd,
BindGfdVerification, BindSplit, BindDynamicFdVerification, BindNdVerification, BindSFD}) {
BindGfdVerification, BindSplit, BindDynamicFdVerification, BindNdVerification, BindSFD,
BindDCVerification}) {
bind_func(module);
}
// clang-format on
}

} // namespace python_bindings
17 changes: 17 additions & 0 deletions src/python_bindings/dc/bind_dc_verification.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "bind_dc_verification.h"

#include "algorithms/dc/verifier/dc_verifier.h"
#include "py_util/bind_primitive.h"

namespace python_bindings {

namespace py = pybind11;

void BindDCVerification(py::module_& main_module) {
auto dc_verification_module = main_module.def_submodule("dc_verification");

BindPrimitiveNoBase<algos::DCVerifier>(dc_verification_module, "DCVerification")
.def("dc_holds", &algos::DCVerifier::DCHolds);
}

} // namespace python_bindings
7 changes: 7 additions & 0 deletions src/python_bindings/dc/bind_dc_verification.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once

#include <pybind11/pybind11.h>

namespace python_bindings {
void BindDCVerification(pybind11::module_& main_module);
} // namespace python_bindings
2 changes: 1 addition & 1 deletion src/python_bindings/py_util/get_py_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ py::tuple GetPyType(std::type_index type_index) {
PyTypePair<std::filesystem::path, kPyStr>,
PyTypePair<std::vector<std::filesystem::path>, kPyList, kPyStr>,
PyTypePair<std::unordered_set<size_t>, kPySet, kPyInt>,
};
PyTypePair<std::string, kPyStr>};
return type_map.at(type_index)();
}

Expand Down
2 changes: 1 addition & 1 deletion src/python_bindings/py_util/py_to_any.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ std::unordered_map<std::type_index, ConvFunc> const kConverters{
kNormalConvPair<std::filesystem::path>,
kNormalConvPair<std::vector<std::filesystem::path>>,
kNormalConvPair<std::unordered_set<size_t>>,
};
kNormalConvPair<std::string>};

} // namespace

Expand Down
2 changes: 2 additions & 0 deletions src/tests/all_csv_configs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,6 @@ CSVConfig const kLineItem = CreateCsvConfig("LineItem.csv", '|', true);
CSVConfig const kTmpDC = CreateCsvConfig("tmp_dc.csv", ',', true);
CSVConfig const kTestDC = CreateCsvConfig("TestDC.csv", ',', true);
CSVConfig const kTestDC1 = CreateCsvConfig("TestDC1.csv", ',', true);
CSVConfig const kTestDC2 = CreateCsvConfig("TestDC2.csv", ',', true);
CSVConfig const kTestDC3 = CreateCsvConfig("TestDC3.csv", ',', true);
} // namespace tests
2 changes: 2 additions & 0 deletions src/tests/all_csv_configs.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,6 @@ extern CSVConfig const kLineItem;
extern CSVConfig const kTmpDC;
extern CSVConfig const kTestDC;
extern CSVConfig const kTestDC1;
extern CSVConfig const kTestDC2;
extern CSVConfig const kTestDC3;
} // namespace tests
5 changes: 4 additions & 1 deletion src/tests/test_dc_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ TEST(TestDCVerifier, TestUCC) {
auto params = GetParamMap(kTestDC, dc_string);
std::unique_ptr<DCVerifier> dc_verifier = algos::CreateAndLoadAlgorithm<DCVerifier>(params);
dc_verifier->Execute();

size_t expected_size = 0;
std::vector<Point> res = dc_verifier->GetViolations();
EXPECT_EQ(expected_size, res.size());
EXPECT_TRUE(dc_verifier->DCHolds());
}

Expand Down Expand Up @@ -62,7 +66,6 @@ TEST(TestDCVerifier, TestMixedNamesColNums) {
}

TEST(TestDCVerifier, TestNoHeader) {
CSVConfig const kTestAR{"TestAR.csv", ',', false};
std::string dc_string = "!(s.0 == t.1 and s.1 == t.2 and s.2 == t.3)";
auto params = GetParamMap(kBernoulliRelation, dc_string);
std::unique_ptr<DCVerifier> dc_verifier = algos::CreateAndLoadAlgorithm<DCVerifier>(params);
Expand Down
12 changes: 12 additions & 0 deletions test_input_data/TestDC2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
State,Salary,FedTaxRate
NewYork,3000,0.2
NewYork,4000,0.25
NewYork,5000,0.3
Wisconsin,5000,0.15
Wisconsin,6000,0.2
Wisconsin,4000,0.1
Wisconsin,4500,0.12
Texas,5000,0.05
Texas,1000,0.15
Texas,2000,0.25
Texas,3000,0.3
14 changes: 14 additions & 0 deletions test_input_data/TestDC3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
NewYork,3000,0.2
NewYork,4000,0.25
NewYork,5000,0.3
NewYork,10,10
Wisconsin,5000,0.15
Wisconsin,6000,0.2
Wisconsin,20,20
Wisconsin,4000,0.1
Texas,5000,0.05
Texas,1000,0.15
Texas,2000,0.25
Texas,3000,0.3
Texas,3000,0.31
Texas,30,30

0 comments on commit 88a8cde

Please sign in to comment.