From 9b7fad7281acec22b9b6c74a30df227ef586e0ee Mon Sep 17 00:00:00 2001
From: DaniilGoncharov <daniilgoncharov08@gmail.com>
Date: Mon, 22 Apr 2024 17:56:08 +0300
Subject: [PATCH] Add script for testing examples

Add script for testing examples (without gfd).  Bindings now return sorted list of fds (only for algos inherited from FDAlgorithm). Update README to the current output
---
 README.md                                     |  26 +--
 README_PYPI.md                                |  18 +--
 examples/afd_multiple_error_thresholds.py     |   8 +-
 examples/comparison_pfd_vs_afd.py             |   7 +-
 examples/dedupe.py                            |   1 +
 examples/mining_set_od_2.py                   |   2 +-
 examples/testing/inputs/dedupe_input.txt      |  13 ++
 .../afd_multiple_error_thresholds_output.txt  |   4 +
 .../outputs/algebraic_constraints_output.txt  |  19 +++
 .../outputs/anomaly_detection_output.txt      |  44 +++++
 .../outputs/comparison_pfd_vs_afd_output.txt  |   7 +
 .../testing/outputs/data_stats_output.txt     | 152 ++++++++++++++++++
 examples/testing/outputs/dedupe_output.txt    |  92 +++++++++++
 .../testing/outputs/mine_typos_output.txt     |  44 +++++
 .../testing/outputs/mining_afd_output.txt     |   4 +
 .../testing/outputs/mining_cfd_output.txt     | 124 ++++++++++++++
 examples/testing/outputs/mining_fd_output.txt |   8 +
 .../testing/outputs/mining_ind_output.txt     |  34 ++++
 .../testing/outputs/mining_list_od_output.txt |  49 ++++++
 .../testing/outputs/mining_pfd_output.txt     |   3 +
 .../outputs/mining_set_od_1_output.txt        | 108 +++++++++++++
 .../outputs/mining_set_od_2_output.txt        | 134 +++++++++++++++
 .../testing/outputs/verifying_aucc_output.txt |  64 ++++++++
 .../outputs/verifying_fd_afd_output.txt       |  90 +++++++++++
 .../testing/outputs/verifying_mfd_output.txt  |   1 +
 .../testing/outputs/verifying_ucc_output.txt  |  18 +++
 examples/testing/test_examples.sh             |  40 +++++
 src/core/algorithms/fd/fd_algorithm.cpp       |  14 ++
 src/core/algorithms/fd/fd_algorithm.h         |   2 +
 src/core/model/table/vertical.cpp             |   8 -
 src/core/model/table/vertical.h               |   4 +-
 src/python_bindings/fd/bind_fd.cpp            |   3 +-
 32 files changed, 1104 insertions(+), 41 deletions(-)
 create mode 100644 examples/testing/inputs/dedupe_input.txt
 create mode 100644 examples/testing/outputs/afd_multiple_error_thresholds_output.txt
 create mode 100644 examples/testing/outputs/algebraic_constraints_output.txt
 create mode 100644 examples/testing/outputs/anomaly_detection_output.txt
 create mode 100644 examples/testing/outputs/comparison_pfd_vs_afd_output.txt
 create mode 100644 examples/testing/outputs/data_stats_output.txt
 create mode 100644 examples/testing/outputs/dedupe_output.txt
 create mode 100644 examples/testing/outputs/mine_typos_output.txt
 create mode 100644 examples/testing/outputs/mining_afd_output.txt
 create mode 100644 examples/testing/outputs/mining_cfd_output.txt
 create mode 100644 examples/testing/outputs/mining_fd_output.txt
 create mode 100644 examples/testing/outputs/mining_ind_output.txt
 create mode 100644 examples/testing/outputs/mining_list_od_output.txt
 create mode 100644 examples/testing/outputs/mining_pfd_output.txt
 create mode 100644 examples/testing/outputs/mining_set_od_1_output.txt
 create mode 100644 examples/testing/outputs/mining_set_od_2_output.txt
 create mode 100644 examples/testing/outputs/verifying_aucc_output.txt
 create mode 100644 examples/testing/outputs/verifying_fd_afd_output.txt
 create mode 100644 examples/testing/outputs/verifying_mfd_output.txt
 create mode 100644 examples/testing/outputs/verifying_ucc_output.txt
 create mode 100644 examples/testing/test_examples.sh

diff --git a/README.md b/README.md
index 568826d51..c96d825bf 100644
--- a/README.md
+++ b/README.md
@@ -54,13 +54,13 @@ python3 cli.py --task=fd --table=../examples/datasets/university_fd.csv , True
 ```
 
 ```text
-[Course Classroom] -> Professor
-[Classroom Semester] -> Professor
-[Classroom Semester] -> Course
 [Professor] -> Course
-[Professor Semester] -> Classroom
+[Course Classroom] -> Professor
 [Course Semester] -> Classroom
 [Course Semester] -> Professor
+[Classroom Semester] -> Course
+[Classroom Semester] -> Professor
+[Professor Semester] -> Classroom
 ```
 
 2) Discover all approximate functional dependencies with error less than or equal to 0.1 in a table represented by a .csv file that uses a comma as the separator and has a header row. In this example the default AFD discovery algorithm (Pyro) is used.
@@ -114,13 +114,13 @@ for fd in result:
 ```
 ```text
 FDs:
-[Course Classroom] -> Professor
-[Classroom Semester] -> Professor
-[Classroom Semester] -> Course
 [Professor] -> Course
-[Professor Semester] -> Classroom
+[Course Classroom] -> Professor
 [Course Semester] -> Classroom
 [Course Semester] -> Professor
+[Classroom Semester] -> Course
+[Classroom Semester] -> Professor
+[Professor Semester] -> Classroom
 ```
 
 2) Discover all approximate functional dependencies with error less than or equal to 0.1 in a table represented by a .csv file that uses a comma as the separator and has a header row. In this example the AFD discovery algorithm Pyro is used.
@@ -141,8 +141,8 @@ for fd in result:
 ```
 ```text
 AFDs:
-[Id] -> Price
 [Id] -> ProductName
+[Id] -> Price
 [ProductName] -> Price
 ```
 
@@ -178,16 +178,16 @@ MFD holds
 >>> pyro.load_data(table=df)
 >>> pyro.execute(error=0.0)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[0 1 2] -> 4, [0 2 3] -> 4, [0 1 3] -> 4, [1 2 3] -> 4]
+[[0 1 2] -> 4, [0 1 3] -> 4, [0 2 3] -> 4, [1 2 3] -> 4]
 >>> pyro.execute(error=0.1)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 0, [2] -> 3, [2] -> 1, [0] -> 2, [3] -> 0, [0] -> 3, [0] -> 1, [1] -> 3, [1] -> 0, [3] -> 2, [3] -> 1, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 >>> pyro.execute(error=0.2)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 0, [0] -> 2, [3] -> 2, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4, [3] -> 0, [1] -> 0, [2] -> 3, [2] -> 1, [0] -> 3, [0] -> 1, [1] -> 3, [3] -> 1]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 >>> pyro.execute(error=0.3)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 1, [0] -> 2, [2] -> 0, [2] -> 3, [0] -> 1, [3] -> 2, [3] -> 1, [1] -> 2, [3] -> 0, [0] -> 3, [4] -> 1, [1] -> 0, [1] -> 3, [4] -> 2, [4] -> 3, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4, [4] -> 1, [4] -> 2, [4] -> 3]
 ```
 
 ## Web interface
diff --git a/README_PYPI.md b/README_PYPI.md
index 96c3454f2..e7b7f7b92 100644
--- a/README_PYPI.md
+++ b/README_PYPI.md
@@ -73,13 +73,13 @@ for fd in result:
 
 ```text
 FDs:
-[Course Classroom] -> Professor
-[Classroom Semester] -> Professor
-[Classroom Semester] -> Course
 [Professor] -> Course
-[Professor Semester] -> Classroom
+[Course Classroom] -> Professor
 [Course Semester] -> Classroom
 [Course Semester] -> Professor
+[Classroom Semester] -> Course
+[Classroom Semester] -> Professor
+[Professor Semester] -> Classroom
 ```
 
 2) Discover all approximate functional dependencies with error less than or equal to 0.1 in a table represented by a
@@ -103,8 +103,8 @@ for fd in result:
 
 ```text
 AFDs:
-[Id] -> Price
 [Id] -> ProductName
+[Id] -> Price
 [ProductName] -> Price
 ```
 
@@ -145,16 +145,16 @@ MFD holds
 >>> pyro.load_data(table=df)
 >>> pyro.execute(error=0.0)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[0 1 2] -> 4, [0 2 3] -> 4, [0 1 3] -> 4, [1 2 3] -> 4]
+[[0 1 2] -> 4, [0 1 3] -> 4, [0 2 3] -> 4, [1 2 3] -> 4]
 >>> pyro.execute(error=0.1)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 0, [2] -> 3, [2] -> 1, [0] -> 2, [3] -> 0, [0] -> 3, [0] -> 1, [1] -> 3, [1] -> 0, [3] -> 2, [3] -> 1, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 >>> pyro.execute(error=0.2)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 0, [0] -> 2, [3] -> 2, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4, [3] -> 0, [1] -> 0, [2] -> 3, [2] -> 1, [0] -> 3, [0] -> 1, [1] -> 3, [3] -> 1]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 >>> pyro.execute(error=0.3)
 >>> print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-[[2] -> 1, [0] -> 2, [2] -> 0, [2] -> 3, [0] -> 1, [3] -> 2, [3] -> 1, [1] -> 2, [3] -> 0, [0] -> 3, [4] -> 1, [1] -> 0, [1] -> 3, [4] -> 2, [4] -> 3, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4, [4] -> 1, [4] -> 2, [4] -> 3]
 ```
 
 More examples can be found in the [Desbordante repository](https://github.com/Desbordante/desbordante-core/tree/main/examples) on GitHub.
diff --git a/examples/afd_multiple_error_thresholds.py b/examples/afd_multiple_error_thresholds.py
index c6d6edcf6..c02520434 100644
--- a/examples/afd_multiple_error_thresholds.py
+++ b/examples/afd_multiple_error_thresholds.py
@@ -5,13 +5,13 @@
 pyro.load_data(table=df)
 pyro.execute(error=0.0)
 print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-# [[0 1 2] -> 4, [0 2 3] -> 4, [0 1 3] -> 4, [1 2 3] -> 4]
+# [[0 1 2] -> 4, [0 1 3] -> 4, [0 2 3] -> 4, [1 2 3] -> 4]
 pyro.execute(error=0.1)
 print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-# [[2] -> 0, [2] -> 3, [2] -> 1, [0] -> 2, [3] -> 0, [0] -> 3, [0] -> 1, [1] -> 3, [1] -> 0, [3] -> 2, [3] -> 1, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+# [[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 pyro.execute(error=0.2)
 print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-# [[2] -> 0, [0] -> 2, [3] -> 2, [1] -> 2, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4, [3] -> 0, [1] -> 0, [2] -> 3, [2] -> 1, [0] -> 3, [0] -> 1, [1] -> 3, [3] -> 1]
+# [[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
 pyro.execute(error=0.3)
 print(f'[{", ".join(map(str, pyro.get_fds()))}]')
-# [[2] -> 1, [0] -> 2, [2] -> 0, [2] -> 3, [0] -> 1, [3] -> 2, [3] -> 1, [1] -> 2, [3] -> 0, [0] -> 3, [4] -> 1, [1] -> 0, [1] -> 3, [4] -> 2, [4] -> 3, [2] -> 4, [3] -> 4, [0] -> 4, [1] -> 4]
+# [[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4, [4] -> 1, [4] -> 2, [4] -> 3]
diff --git a/examples/comparison_pfd_vs_afd.py b/examples/comparison_pfd_vs_afd.py
index 07cfb7657..fa0ba820d 100644
--- a/examples/comparison_pfd_vs_afd.py
+++ b/examples/comparison_pfd_vs_afd.py
@@ -1,4 +1,5 @@
 import desbordante
+from ordered_set import OrderedSet
 
 TABLE = 'examples/datasets/glitchy_sensor.csv'
 ERROR = 0.18
@@ -6,7 +7,7 @@
 
 
 def stringify(fds):
-    return set(map(str, fds))
+    return OrderedSet(map(str, fds))
 
 
 def get_afds():
@@ -23,8 +24,8 @@ def get_pfds():
     return algo.get_fds()
 
 
-pfds = set(get_pfds())
-afds = set(get_afds())
+pfds = OrderedSet(get_pfds())
+afds = OrderedSet(get_afds())
 
 print("pFDs \ AFDs =", stringify(pfds - afds))
 print("AFDs \ pFDs =", stringify(afds - pfds))
diff --git a/examples/dedupe.py b/examples/dedupe.py
index fab947f34..129e0ff7b 100644
--- a/examples/dedupe.py
+++ b/examples/dedupe.py
@@ -93,6 +93,7 @@ def merge_handler(df: pandas.DataFrame, new_rows, remaining_rows, used_rows):
     for col_name, values in zip(df.columns,
                                 zip(*df.iloc[list(used_rows)].itertuples(index=False))):
         distinct_values = list(set(values))
+        distinct_values.sort()
         index = 0 if len(distinct_values) == 1 else choose_index(col_name, distinct_values)
         new_row.append(distinct_values[index])
     remaining_rows -= used_rows
diff --git a/examples/mining_set_od_2.py b/examples/mining_set_od_2.py
index 83fb76edd..78b422b7f 100644
--- a/examples/mining_set_od_2.py
+++ b/examples/mining_set_od_2.py
@@ -94,7 +94,7 @@ def print_simple_ods_with_comments(simple_ods, table):
     print('be traced.')
 
     percent_values = list(table['percent'])
-    percent_classes = set([f'class [{i}] with {percent_values.count(i)} element{"" if percent_values.count(i) == 1 else "s"}'
+    percent_classes = list([f'class [{i}] with {percent_values.count(i)} element{"" if percent_values.count(i) == 1 else "s"}'
                            for i in percent_values])
 
     print()
diff --git a/examples/testing/inputs/dedupe_input.txt b/examples/testing/inputs/dedupe_input.txt
new file mode 100644
index 000000000..9c59a9d78
--- /dev/null
+++ b/examples/testing/inputs/dedupe_input.txt
@@ -0,0 +1,13 @@
+0
+1 2 3 4 5 6
+4
+merge 7 8 9
+0
+0
+1
+keepall
+keepall
+merge 52 53
+0
+1
+keepall
diff --git a/examples/testing/outputs/afd_multiple_error_thresholds_output.txt b/examples/testing/outputs/afd_multiple_error_thresholds_output.txt
new file mode 100644
index 000000000..5332e9613
--- /dev/null
+++ b/examples/testing/outputs/afd_multiple_error_thresholds_output.txt
@@ -0,0 +1,4 @@
+[[0 1 2] -> 4, [0 1 3] -> 4, [0 2 3] -> 4, [1 2 3] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4]
+[[0] -> 1, [0] -> 2, [0] -> 3, [0] -> 4, [1] -> 0, [1] -> 2, [1] -> 3, [1] -> 4, [2] -> 0, [2] -> 1, [2] -> 3, [2] -> 4, [3] -> 0, [3] -> 1, [3] -> 2, [3] -> 4, [4] -> 1, [4] -> 2, [4] -> 3]
diff --git a/examples/testing/outputs/algebraic_constraints_output.txt b/examples/testing/outputs/algebraic_constraints_output.txt
new file mode 100644
index 000000000..845674ac0
--- /dev/null
+++ b/examples/testing/outputs/algebraic_constraints_output.txt
@@ -0,0 +1,19 @@
+Discovered ranges for (Delivery date - Dispatch date) are:
+[(2.0, 7.0), (15.0, 22.0)]
+
+Rows in which the result of the chosen operation (-) is outside of discovered ranges:
+id: 7
+Dispatch date: 1
+Delivery date: 30
+Difference: 29
+
+id: 26
+Dispatch date: 7
+Delivery date: 18
+Difference: 11
+
+id: 30
+Dispatch date: 11
+Delivery date: 22
+Difference: 11
+
diff --git a/examples/testing/outputs/anomaly_detection_output.txt b/examples/testing/outputs/anomaly_detection_output.txt
new file mode 100644
index 000000000..5a1e92be0
--- /dev/null
+++ b/examples/testing/outputs/anomaly_detection_output.txt
@@ -0,0 +1,44 @@
+FDs found for dataset 1:
+[item_id] -> item_weight
+[item_weight] -> item_id
+[record_id] -> cargo_id
+[record_id] -> item_id
+[record_id] -> item_weight
+[record_id] -> timestamp
+[timestamp] -> cargo_id
+[timestamp] -> item_id
+[timestamp] -> item_weight
+[timestamp] -> record_id
+FDs found for dataset 2:
+[item_id] -> item_weight
+[item_weight] -> item_id
+[record_id] -> cargo_id
+[record_id] -> item_id
+[record_id] -> item_weight
+[record_id] -> timestamp
+[timestamp] -> cargo_id
+[timestamp] -> item_id
+[timestamp] -> item_weight
+[timestamp] -> record_id
+FDs found for dataset 3:
+[item_weight] -> item_id
+[record_id] -> cargo_id
+[record_id] -> item_id
+[record_id] -> item_weight
+[record_id] -> timestamp
+[timestamp] -> cargo_id
+[timestamp] -> item_id
+[timestamp] -> item_weight
+[timestamp] -> record_id
+AFDs found for dataset 3:
+[item_id cargo_id] -> item_weight
+[item_weight] -> item_id
+[record_id] -> cargo_id
+[record_id] -> item_id
+[record_id] -> item_weight
+[record_id] -> timestamp
+[timestamp] -> cargo_id
+[timestamp] -> item_id
+[timestamp] -> item_weight
+[timestamp] -> record_id
+MFD holds.
diff --git a/examples/testing/outputs/comparison_pfd_vs_afd_output.txt b/examples/testing/outputs/comparison_pfd_vs_afd_output.txt
new file mode 100644
index 000000000..9447c66c6
--- /dev/null
+++ b/examples/testing/outputs/comparison_pfd_vs_afd_output.txt
@@ -0,0 +1,7 @@
+pFDs \ AFDs = OrderedSet(['[DeviceId] -> Data'])
+AFDs \ pFDs = OrderedSet()
+AFDs ∩ pFDs = OrderedSet(['[Id] -> DeviceId', '[Id] -> Data', '[Data] -> Id', '[Data] -> DeviceId'])
+1 - PerValue([DeviceId] -> Data) = 0.1714285714
+e([DeviceId] -> Data) = 0.23076923076923078
+In case of PerValue error measure, violations on data from the single "glitchy"
+sensor device among many do not prevent dependecy from being found
diff --git a/examples/testing/outputs/data_stats_output.txt b/examples/testing/outputs/data_stats_output.txt
new file mode 100644
index 000000000..956b33b78
--- /dev/null
+++ b/examples/testing/outputs/data_stats_output.txt
@@ -0,0 +1,152 @@
+Columns with null: []
+Columns with all unique values: [0, 1]
+Number of columns: 6
+
+Column num: 0
+Min: 0008f14d-e2a7-4582-bf5e-89ce32b55606
+Max: fff1cd7a-04f9-486c-97de-d5d2c6ddb3cb
+Distinct: 945
+
+Column num: 1
+Min: Anthony Campbell
+Max: William Taylor
+Distinct: 945
+
+Column num: 2
+Min: Addyson Aaliyah
+Max: Shena Desiree
+Distinct: 6
+
+Column num: 3
+Min: MonsterWorq
+Max: Yogatacular
+Distinct: 5
+
+Column num: 4
+Avg: 932.258201058201
+Sum of squares: 894298474
+Median: 945.0
+Min: 465
+Max: 2036
+Distinct: 28
+Corrected std: 278.07204551856535
+
+Column num: 5
+Min: Client Solution Analyst
+Max: Workshop Technician
+Distinct: 15
+
+Column num = 0
+num_chars = 34020
+num_uppercase_chars = 0
+type = String
+isCategorical = 0
+num_lowercase_chars = 11108
+count = 945
+quantile50 = 81aabb56-808c-48a1-b2a3-5d3f2e1a752f
+num_digit_chars = 19132
+distinct = 945
+avg_chars = 36.000000
+min = 0008f14d-e2a7-4582-bf5e-89ce32b55606
+quantile25 = 4307ef5b-2e00-4316-b04c-debff4edc5c4
+max = fff1cd7a-04f9-486c-97de-d5d2c6ddb3cb
+quantile75 = c8539dda-ec0e-4c67-a2f4-2d201bb82171
+num_non_letter_chars = 22912
+vocab = -0123456789abcdef
+
+Column num = 1
+num_chars = 12261
+num_uppercase_chars = 1890
+type = String
+isCategorical = 0
+num_lowercase_chars = 9426
+count = 945
+quantile50 = Kenneth King
+num_digit_chars = 0
+distinct = 945
+avg_chars = 12.974603
+min = Anthony Campbell
+quantile25 = Donna White
+max = William Taylor
+quantile75 = Patricia Gonzalez
+num_non_letter_chars = 945
+vocab =  ABCDEGHJKLMNPRSTWYabcdefghiklmnoprstuvwyz
+
+Column num = 2
+num_chars = 11843
+num_uppercase_chars = 1890
+type = String
+isCategorical = 1
+num_lowercase_chars = 9008
+count = 945
+quantile50 = Galen Calla
+num_digit_chars = 0
+distinct = 6
+avg_chars = 12.532275
+min = Addyson Aaliyah
+quantile25 = Carrie Silvia
+max = Shena Desiree
+quantile75 = Paul Jeffry
+num_non_letter_chars = 945
+vocab =  ACDGJPSadefhilnorsuvy
+
+Column num = 3
+num_chars = 10452
+num_uppercase_chars = 1300
+type = String
+isCategorical = 1
+num_lowercase_chars = 9152
+count = 945
+quantile50 = Talkspiration
+num_digit_chars = 0
+distinct = 5
+avg_chars = 11.060317
+min = MonsterWorq
+quantile25 = SpeakerAce
+max = Yogatacular
+quantile75 = Verbalthon
+num_non_letter_chars = 0
+vocab = AMSTVWYabceghiklnopqrstu
+
+Column num = 4
+median_ad = 100.000000
+geometric_mean = 893.289725
+median = 945.000000
+sum_of_squares = 894298474
+num_negatives = 0
+quantile75 = 1020
+type = Int
+mean_ad = 186.978103
+isCategorical = 0
+kurtosis = 2.859101
+count = 945
+quantile50 = 945
+num_zeros = 0
+avg = 932.258201
+distinct = 28
+STD = 278.072046
+skewness = 1.132442
+min = 465
+quantile25 = 800
+max = 2036
+sum = 880984
+
+Column num = 5
+num_chars = 17603
+num_uppercase_chars = 2226
+type = String
+isCategorical = 0
+num_lowercase_chars = 14152
+count = 945
+quantile50 = Physiotherapist
+num_digit_chars = 0
+distinct = 15
+avg_chars = 18.627513
+min = Client Solution Analyst
+quantile25 = JavaScript Developer
+max = Workshop Technician
+quantile75 = Service Technician
+num_non_letter_chars = 1225
+vocab =  -ACDEFJLMOPRSTWacdeghijklmnoprstuvy
+
+
diff --git a/examples/testing/outputs/dedupe_output.txt b/examples/testing/outputs/dedupe_output.txt
new file mode 100644
index 000000000..9c6eef4dc
--- /dev/null
+++ b/examples/testing/outputs/dedupe_output.txt
@@ -0,0 +1,92 @@
+Deduplication parameters:
+ALGORITHM='Pyro'
+ERROR=0.00100
+DATASET_PATH='examples/datasets/duplicates.csv'
+SEPARATOR=','
+INITIAL_WINDOW_SIZE=4
+
+Dataset sample:
+      id             name address       city                          email phone country
+0   5996        Kaede Sue      66      Pirus       Kaede.Sue4422@virtex.rum    39      EU
+1     36       Licia Wolf      35  Pilington       Licia.Wolf1260@cmail.com    35      CM
+2     17        Steve Doe      16     Syndye           Steve.Doe272@muli.ry    16      GZ
+3     62      Lisa Tarski      61     Syndye     Lisa.Tarski3782@virtex.rum    61      JU
+4      6      Mary Tarski       5     Lumdum       Mary.Tarski30@ferser.edu     5      PR
+..   ...              ...     ...        ...                            ...   ...     ...
+73    15        Ivan Dawn      14     Syndye      Ivan.Dawn210@atomlema.ocg    14      FC
+74  5993       Lisa Honjo      63       Roit      Lisa.Honjo4032@virtex.rum    63      AI
+75    59         Lisa Sue      58     Muxicu         Lisa.Sue3422@cmail.com    58      AI
+76    21  Steve Shiramine      20  Pilington  Steve.Shiramine420@ferser.edu    20      GZ
+77    44      Maxine Wolf      43     Muxicu   Maxine.Wolf1892@atomlema.ocg    43      PR
+
+[78 rows x 7 columns]
+Original records: 78
+
+AFD info:
+0: id -> ( name address city email phone country )
+2: address -> ( name )
+4: email -> ( name address phone country )
+5: phone -> ( name )
+LHS column index: RHS columns:
+1: name
+2: address
+3: city
+4: email
+5: phone
+6: country
+RHS columns to use (indices): Equal columns to consider duplicates:       id          name address      city                       email phone country
+5     27     Björn Sue      26      Roit      Björn.Sue702@cmail.com    26      CM
+6     30  Björn Tarski      29    Lumdum  Björn.Tarski870@ferser.edu    29      PR
+7   5957    Björn Wolf      27              Björn.Wolf756@virtex.rum    27      AI
+8     28    Björn Wolf      27              Björn.Wolf756@virtex.rum    27      AI
+9  11886    Björn Wolf      28  Kustruma    Björn.Wolf756@virtex.rum    27      AI
+Command: Column: id. Which value to use?
+0: 11886
+1: 28
+2: 5957
+index: Column: address. Which value to use?
+0: 27
+1: 28
+index: Column: city. Which value to use?
+0: 
+1: Kustruma
+index:    id          name address    city                       email phone country
+5  27     Björn Sue      26    Roit      Björn.Sue702@cmail.com    26      CM
+6  30  Björn Tarski      29  Lumdum  Björn.Tarski870@ferser.edu    29      PR
+Command:       id        name address       city                       email phone country
+42    63   Lisa Dawn      62  Pilington  Lisa.Dawn3906@atomlema.ocg    62      EU
+43    57    Lisa Doe      56       Roit     Lisa.Doe3192@virtex.rum    56      AI
+44    64  Lisa Honjo      63      Pirus   Lisa.Honjo4032@virtex.rum    63      AI
+45  5993  Lisa Honjo      63       Roit   Lisa.Honjo4032@virtex.rum    63      AI
+Command:        id       name address    city                     email phone country
+50     60  Lisa Wolf      59  Syndye   Lisa.Wolf3540@cmail.com    59      FC
+51      7  Mary Dawn       6  Syndye  Mary.Dawn42@atomlema.ocg     6      PR
+52   5930   Mary Doe          Lumdum  Mary.Doe-5926@ferser.edu     0        
+53  11859   Mary Doe          Lumdum  Mary.Doe-5926@ferser.edu     0      EU
+54      1   Mary Doe          Lumdum         Mary.Doe0@muli.ry     4      EU
+Command: Column: id. Which value to use?
+0: 11859
+1: 5930
+index: Column: country. Which value to use?
+0: 
+1: EU
+index:     id       name address    city                     email phone country
+50  60  Lisa Wolf      59  Syndye   Lisa.Wolf3540@cmail.com    59      FC
+51   7  Mary Dawn       6  Syndye  Mary.Dawn42@atomlema.ocg     6      PR
+54   1   Mary Doe          Lumdum         Mary.Doe0@muli.ry     4      EU
+Command: 
+Resulting records: 75. Duplicates found: 3
+    id             name address       city                          email phone country
+0   31       Björn Dawn      30     Muxicu     Björn.Dawn930@atomlema.ocg    30      JU
+1   25        Björn Doe      24  Pilington           Björn.Doe600@muli.ry    24      FC
+2   32      Björn Honjo      31   Kustruma      Björn.Honjo992@virtex.rum    31      RI
+3   29  Björn Shiramine      28     Syndye  Björn.Shiramine812@virtex.rum    28      EU
+4   26      Björn Smith      25  Pilington      Björn.Smith650@virtex.rum    25      RI
+..  ..              ...     ...        ...                            ...   ...     ...
+70  21  Steve Shiramine      20  Pilington  Steve.Shiramine420@ferser.edu    20      GZ
+71  20       Steve Wolf      19  Pilington          Steve.Wolf380@muli.ry    19      RI
+72  22     Steve Tarski      21  Pilington   Steve.Tarski462@atomlema.ocg    21      PR
+73  19        Steve Sue      18     Syndye        Steve.Sue342@virtex.rum    18      AI
+74  18      Steve Smith      17     Lumdum       Steve.Smith306@cmail.com    17      EU
+
+[75 rows x 7 columns]
diff --git a/examples/testing/outputs/mine_typos_output.txt b/examples/testing/outputs/mine_typos_output.txt
new file mode 100644
index 000000000..ef25e0c11
--- /dev/null
+++ b/examples/testing/outputs/mine_typos_output.txt
@@ -0,0 +1,44 @@
+Starting typo discovery scenario with parameters:
+RADIUS=3
+RATIO=0.1
+ERROR=0.005
+DATASET_PATH='examples/datasets/Workshop.csv'
+EXACT_ALGORITHM='HyFD'
+APPROXIMATE_ALGORITHM='Pyro'
+HEADER=0
+SEPARATOR=','
+
+Dataset sample:
+                                       id      worker_name supervisor_surname       workshop salary                   job_post
+0    404f50cb-caf0-4974-97f9-9463434537e1   Jennifer Moore        Galen Calla    Yogatacular    980    Client Solution Analyst
+1    b5e38281-9c09-49bf-91f5-c55397df4d43       Edward Lee      Carrie Silvia    MonsterWorq    905  Front-End Loader Operator
+2    972b299d-2f27-4d6d-81d2-8effbc543bf1        Brian Lee      Shena Desiree  Talkspiration    700             Farm Assistant
+3    3241fb48-5a15-4638-bd68-d915834a3f89   Kenneth Turner        Paul Jeffry     Verbalthon    980    Client Solution Analyst
+4    9cbb9026-f157-4a01-aace-a42b05ab2a28   Betty Campbell    Addyson Aaliyah     SpeakerAce    800            Physiotherapist
+..                                    ...              ...                ...            ...    ...                        ...
+940  9cd700bc-b3d9-439d-afe9-945c2a20bc37    Richard Lopez        Galen Calla    Yogatacular    845   Senior Financial Planner
+941  cc199ff4-453a-4ae5-9fbd-b45d72fa952a  Helen Rodriguez      Carrie Silvia    MonsterWorq    465                Electrician
+942  de650347-880a-42a2-88c9-4329f26fb912      Karen White      Carrie Silvia    MonsterWorq    510       JavaScript Developer
+943  ae604e24-e040-4d50-b685-5b4897ab9ae9    Charles Smith      Shena Desiree  Talkspiration    975              Store Manager
+944  d5cb954a-e942-47ae-9b62-b57f7a84c2db        Jeff King      Carrie Silvia    MonsterWorq    465                Electrician
+
+[945 rows x 6 columns]
+
+Searching for almost holding FDs...
+
+Found! Almost holding FDs:
+[supervisor_surname salary] -> job_post
+[supervisor_surname job_post] -> salary
+[workshop] -> supervisor_surname
+[workshop salary] -> job_post
+[workshop job_post] -> salary
+
+Selecting FD with index 2:
+ rows count    workshop supervisor_surname
+        198 Yogatacular        Galen Calla
+          1 Yogatacular      Galen Calella
+
+Typo candidates and context:
+                                     id       worker_name supervisor_surname     workshop salary                 job_post
+0  404f50cb-caf0-4974-97f9-9463434537e1    Jennifer Moore        Galen Calla  Yogatacular    980  Client Solution Analyst
+7  ddba9118-ec89-472d-9f3f-bebd919f0e3a  William Robinson      Galen Calella  Yogatacular    975            Store Manager
diff --git a/examples/testing/outputs/mining_afd_output.txt b/examples/testing/outputs/mining_afd_output.txt
new file mode 100644
index 000000000..191b21eaa
--- /dev/null
+++ b/examples/testing/outputs/mining_afd_output.txt
@@ -0,0 +1,4 @@
+AFDs:
+[Id] -> ProductName
+[Id] -> Price
+[ProductName] -> Price
diff --git a/examples/testing/outputs/mining_cfd_output.txt b/examples/testing/outputs/mining_cfd_output.txt
new file mode 100644
index 000000000..042a99843
--- /dev/null
+++ b/examples/testing/outputs/mining_cfd_output.txt
@@ -0,0 +1,124 @@
+options: 
+MINIMUM SUPPORT = 8 , MINIMUM CONFIDENCE = 0.7 , MAXIMUM LHS COUNT = 3
+displaying the first five (or fewer) discovered CFDs:
+
+CFD:
+{(3, False)} -> (4, True) :
+
+      Outlook Temperature Humidity  Windy   Play
+ 0      sunny         hot     high  False  False 
+ 1      sunny         hot     high   True  False 
+ 2   overcast         hot     high  False   True 
+ 3       rain        mild     high  False   True 
+ 4       rain        cool   normal  False   True 
+ 5       rain        cool   normal   True  False 
+ 6   overcast        cool   normal   True   True 
+ 7      sunny        mild     high  False  False 
+ 8      sunny        cool   normal  False   True 
+ 9       rain        mild   normal  False   True 
+ 10     sunny        mild   normal   True   True 
+ 11  overcast        mild     high   True   True 
+ 12  overcast         hot   normal  False   True 
+ 13      rain        mild     high   True  False 
+lhs count:  1
+support:  8       
+confidence:   6 / 8  =  0.7500
+
+
+
+CFD:
+{(2, _)} -> (4, _) :
+
+      Outlook Temperature Humidity  Windy   Play
+ 0      sunny         hot     high  False  False 
+ 1      sunny         hot     high   True  False 
+ 2   overcast         hot     high  False   True 
+ 3       rain        mild     high  False   True 
+ 4       rain        cool   normal  False   True 
+ 5       rain        cool   normal   True  False 
+ 6   overcast        cool   normal   True   True 
+ 7      sunny        mild     high  False  False 
+ 8      sunny        cool   normal  False   True 
+ 9       rain        mild   normal  False   True 
+ 10     sunny        mild   normal   True   True 
+ 11  overcast        mild     high   True   True 
+ 12  overcast         hot   normal  False   True 
+ 13      rain        mild     high   True  False 
+lhs count:  1
+support:  14       
+confidence:   10 / 14  =  0.7143
+
+
+
+CFD:
+{(4, _)} -> (2, _) :
+
+      Outlook Temperature Humidity  Windy   Play
+ 0      sunny         hot     high  False  False 
+ 1      sunny         hot     high   True  False 
+ 2   overcast         hot     high  False   True 
+ 3       rain        mild     high  False   True 
+ 4       rain        cool   normal  False   True 
+ 5       rain        cool   normal   True  False 
+ 6   overcast        cool   normal   True   True 
+ 7      sunny        mild     high  False  False 
+ 8      sunny        cool   normal  False   True 
+ 9       rain        mild   normal  False   True 
+ 10     sunny        mild   normal   True   True 
+ 11  overcast        mild     high   True   True 
+ 12  overcast         hot   normal  False   True 
+ 13      rain        mild     high   True  False 
+lhs count:  1
+support:  14       
+confidence:   10 / 14  =  0.7143
+
+
+
+CFD:
+{(3, _),(2, _)} -> (4, _) :
+
+      Outlook Temperature Humidity  Windy   Play
+ 0      sunny         hot     high  False  False 
+ 1      sunny         hot     high   True  False 
+ 2   overcast         hot     high  False   True 
+ 3       rain        mild     high  False   True 
+ 4       rain        cool   normal  False   True 
+ 5       rain        cool   normal   True  False 
+ 6   overcast        cool   normal   True   True 
+ 7      sunny        mild     high  False  False 
+ 8      sunny        cool   normal  False   True 
+ 9       rain        mild   normal  False   True 
+ 10     sunny        mild   normal   True   True 
+ 11  overcast        mild     high   True   True 
+ 12  overcast         hot   normal  False   True 
+ 13      rain        mild     high   True  False 
+lhs count:  2
+support:  14       
+confidence:   10 / 14  =  0.7143
+
+
+
+CFD:
+{(2, _),(3, False)} -> (4, _) :
+
+      Outlook Temperature Humidity  Windy   Play
+ 0      sunny         hot     high  False  False 
+ 1      sunny         hot     high   True  False 
+ 2   overcast         hot     high  False   True 
+ 3       rain        mild     high  False   True 
+ 4       rain        cool   normal  False   True 
+ 5       rain        cool   normal   True  False 
+ 6   overcast        cool   normal   True   True 
+ 7      sunny        mild     high  False  False 
+ 8      sunny        cool   normal  False   True 
+ 9       rain        mild   normal  False   True 
+ 10     sunny        mild   normal   True   True 
+ 11  overcast        mild     high   True   True 
+ 12  overcast         hot   normal  False   True 
+ 13      rain        mild     high   True  False 
+lhs count:  2
+support:  8       
+confidence:   6 / 8  =  0.7500
+
+
+
diff --git a/examples/testing/outputs/mining_fd_output.txt b/examples/testing/outputs/mining_fd_output.txt
new file mode 100644
index 000000000..a0abfc82f
--- /dev/null
+++ b/examples/testing/outputs/mining_fd_output.txt
@@ -0,0 +1,8 @@
+FDs:
+[Professor] -> Course
+[Course Classroom] -> Professor
+[Course Semester] -> Classroom
+[Course Semester] -> Professor
+[Classroom Semester] -> Course
+[Classroom Semester] -> Professor
+[Professor Semester] -> Classroom
diff --git a/examples/testing/outputs/mining_ind_output.txt b/examples/testing/outputs/mining_ind_output.txt
new file mode 100644
index 000000000..86f2687d1
--- /dev/null
+++ b/examples/testing/outputs/mining_ind_output.txt
@@ -0,0 +1,34 @@
+Found inclusion dependencies (-> means "is included in"):
+
+(course.csv, [Department name]) -> (department.csv, [Department name])
+(instructor.csv, [Department name]) -> (department.csv, [Department name])
+(student.csv, [Department name]) -> (department.csv, [Department name])
+(teaches.csv, [Instructor ID]) -> (instructor.csv, [ID])
+(teaches.csv, [Course ID]) -> (course.csv, [Course ID])
+
+Tables for first IND:
+course.csv:
+
+Course ID   Title              Department name                       
+---------------------------------------------------------------------
+IT-1        Computer Science   Institute of Information Technology   
+MM-3        Algebra            Mathematics and Mechanics Faculty     
+H-1         History            Institute of History                  
+FL-2        English            Faculty of Foreign Languages          
+IT-2        Programming        Institute of Information Technology   
+S-5         Philosophy         Faculty of Sociology                  
+P-2         Physics            Faculty of Physics                    
+C-8         Chemistry          Institute of Chemistry                
+
+department.csv:
+
+Department name                       Building             
+-----------------------------------------------------------
+Institute of Information Technology   5 Academic av.       
+Mathematics and Mechanics Faculty     3 Academic av.       
+Institute of History                  29A University st.   
+Faculty of Foreign Languages          10 Science sq.       
+Faculty of Sociology                  29C University st.   
+Faculty of Physics                    10 Academic av.      
+Institute of Chemistry                11 Academic av.      
+Graduate School of Managemment        49 Science sq.       
diff --git a/examples/testing/outputs/mining_list_od_output.txt b/examples/testing/outputs/mining_list_od_output.txt
new file mode 100644
index 000000000..7177b7767
--- /dev/null
+++ b/examples/testing/outputs/mining_list_od_output.txt
@@ -0,0 +1,49 @@
+
++----+----------+-----------------+--------+
+|    |   weight |   shipping cost |   days |
+|----+----------+-----------------+--------|
+|  0 |        5 |              14 |      2 |
+|  1 |       10 |              22 |      6 |
+|  2 |        3 |              10 |      4 |
+|  3 |       10 |              25 |      7 |
+|  4 |        5 |              14 |      2 |
+|  5 |       20 |              40 |      8 |
++----+----------+-----------------+--------+
+
+Resulting dependencies for this table are:
+['weight', 'days'] -> ['shipping cost']
+['shipping cost'] -> ['weight', 'days']
+['weight'] -> ['shipping cost']
+
+Depenency [weight] -> [shipping cost] means that ordering table by weight
+will also order table by shipping cost automatically. Let's order by weight: 
+
++----+----------+-----------------+--------+
+|    |   weight |   shipping cost |   days |
+|----+----------+-----------------+--------|
+|  2 |        3 |              10 |      4 |
+|  0 |        5 |              14 |      2 |
+|  4 |        5 |              14 |      2 |
+|  1 |       10 |              22 |      6 |
+|  3 |       10 |              25 |      7 |
+|  5 |       20 |              40 |      8 |
++----+----------+-----------------+--------+
+
+We can see that shipping cost is sorted too. And dependency seems reasonable:
+the more the package weights, the more expensive it will be to send it.
+
+Order dependencies are called lexicographical, because ordering for multiple
+columns is lexicographical. For example [shipping cost] -> [weight, days] implies
+that ordering by shipping cost will also lexicographically order [weight, days]:
+
++----+----------+-----------------+--------+
+|    |   weight |   shipping cost |   days |
+|----+----------+-----------------+--------|
+|  2 |        3 |              10 |      4 |
+|  0 |        5 |              14 |      2 |
+|  4 |        5 |              14 |      2 |
+|  1 |       10 |              22 |      6 |
+|  3 |       10 |              25 |      7 |
+|  5 |       20 |              40 |      8 |
++----+----------+-----------------+--------+
+
diff --git a/examples/testing/outputs/mining_pfd_output.txt b/examples/testing/outputs/mining_pfd_output.txt
new file mode 100644
index 000000000..1e91b04be
--- /dev/null
+++ b/examples/testing/outputs/mining_pfd_output.txt
@@ -0,0 +1,3 @@
+per_value pFDs:
+[Y] -> X
+per_tuple pFDs:
diff --git a/examples/testing/outputs/mining_set_od_1_output.txt b/examples/testing/outputs/mining_set_od_1_output.txt
new file mode 100644
index 000000000..983052ce7
--- /dev/null
+++ b/examples/testing/outputs/mining_set_od_1_output.txt
@@ -0,0 +1,108 @@
++----+--------+------------------+--------------+
+|    |   year |   employee_grade |   avg_salary |
+|----+--------+------------------+--------------|
+|  0 |   2020 |               24 |         1000 |
+|  1 |   2020 |               40 |         7000 |
+|  2 |   2020 |               32 |         5000 |
+|  3 |   2020 |               29 |         3000 |
+|  4 |   2020 |               49 |        10000 |
+|  5 |   2021 |               50 |        15000 |
+|  6 |   2021 |               25 |         1500 |
+|  7 |   2021 |               30 |         6000 |
++----+--------+------------------+--------------+
+
+Attribute symbols:
+year -- 1
+employee_grade -- 2
+avg_salary -- 3
+
+descending ods: 0
+
+ascending ods: 2
+{1} : 2<= ~ 3<=
+{1} : 3<= ~ 2<=
+
+Dependency "{1} : 2<= ~ 3<=" means that ordering the table
+inside each equivalence class from "year" by attribute "avg_salary"
+automatically entails ordering by attribute "employee_grade".
+
+We have 2 equivalence classes in "year": [2020] and [2021].
+Let's split the table into two tables based on these classes.
+
+Part 1: this part of table corresponds to class [2020]
++----+--------+------------------+--------------+
+|    |   year |   employee_grade |   avg_salary |
+|----+--------+------------------+--------------|
+|  0 |   2020 |               24 |         1000 |
+|  1 |   2020 |               40 |         7000 |
+|  2 |   2020 |               32 |         5000 |
+|  3 |   2020 |               29 |         3000 |
+|  4 |   2020 |               49 |        10000 |
++----+--------+------------------+--------------+
+
+Let's sort it by attribute "avg_salary".
+
+Sorted part 1:
++----+--------+------------------+--------------+
+|    |   year |   employee_grade |   avg_salary |
+|----+--------+------------------+--------------|
+|  0 |   2020 |               24 |         1000 |
+|  3 |   2020 |               29 |         3000 |
+|  2 |   2020 |               32 |         5000 |
+|  1 |   2020 |               40 |         7000 |
+|  4 |   2020 |               49 |        10000 |
++----+--------+------------------+--------------+
+
+We can see that this sort entails automatic ordering by
+attribute "employee_grade".
+
+Part 2: this part of table corresponds to class [2021]
++----+--------+------------------+--------------+
+|    |   year |   employee_grade |   avg_salary |
+|----+--------+------------------+--------------|
+|  5 |   2021 |               50 |        15000 |
+|  6 |   2021 |               25 |         1500 |
+|  7 |   2021 |               30 |         6000 |
++----+--------+------------------+--------------+
+
+Let's sort it by attribute "avg_salary".
+
+Sorted part 2:
++----+--------+------------------+--------------+
+|    |   year |   employee_grade |   avg_salary |
+|----+--------+------------------+--------------|
+|  6 |   2021 |               25 |         1500 |
+|  7 |   2021 |               30 |         6000 |
+|  5 |   2021 |               50 |        15000 |
++----+--------+------------------+--------------+
+
+We can see that this sort entails automatic ordering by
+attribute "employee_grade" too.
+
+Dependency "{1} : 3<= ~ 2<=" is similar to the first and means that
+ordering the table inside each equivalence class from "year" by
+attribute "employee_grade" automatically entails ordering by
+attribute "avg_salary". This can be seen in the tables above.
+
+In other words, these dependencies indicate that the ordering of
+average salary entails an automatic ordering of the employee grade
+and vice versa.
+
+simple ods: 4
+{2} : [] -> 1<=
+{3} : [] -> 1<=
+{3} : [] -> 2<=
+{2} : [] -> 3<=
+
+These dependencies mean that inside each equivalence class from
+an attribute from their context the constancy of the attribute
+from the right side of the dependency can be traced.
+
+For example, let's look at "{2} : [] -> 1<=". The context of this
+dependency is attribute "employee_grade". We have 8 equivalence classes
+in "employee_grade": [24], [40], [32], [29], [49], [50], [25], [30].
+Since all the elements of attribute "employee_grade" are different,
+each of these classes contains only one element, so constancy within
+each class occurs automatically.
+
+To better understand such dependencies, refer to the second example.
diff --git a/examples/testing/outputs/mining_set_od_2_output.txt b/examples/testing/outputs/mining_set_od_2_output.txt
new file mode 100644
index 000000000..ac9f174a9
--- /dev/null
+++ b/examples/testing/outputs/mining_set_od_2_output.txt
@@ -0,0 +1,134 @@
++----+--------+------------+-----------+
+|    |   year | position   | percent   |
+|----+--------+------------+-----------|
+|  0 |   2020 | director   | 10%       |
+|  1 |   2020 | other      | 50%       |
+|  2 |   2020 | manager    | 40%       |
+|  3 |   2021 | manager    | 35%       |
+|  4 |   2021 | other      | 55%       |
+|  5 |   2021 | director   | 10%       |
++----+--------+------------+-----------+
+
+Attribute symbols:
+year -- 1
+position -- 2
+percent -- 3
+
+descending ods: 0
+
+ascending ods: 2
+{} : 3<= ~ 2<=
+{} : 2<= ~ 3<=
+
+Dependency "{} : 3<= ~ 2<=" means that ordering the table by attribute
+"percent" automatically entails ordering by attribute "position".
+Moreover, this is observed regardless of other attributes, since the
+dependency context is empty.
+
+Let's sort it by attribute "percent".
+
+Sorted table:
++----+--------+------------+-----------+
+|    |   year | position   | percent   |
+|----+--------+------------+-----------|
+|  0 |   2020 | director   | 10%       |
+|  5 |   2021 | director   | 10%       |
+|  3 |   2021 | manager    | 35%       |
+|  2 |   2020 | manager    | 40%       |
+|  1 |   2020 | other      | 50%       |
+|  4 |   2021 | other      | 55%       |
++----+--------+------------+-----------+
+
+We can see that this sort entails automatic ordering by attribute
+"position".
+
+Dependency "{} : 2<= ~ 3<=" is similar to the first and means that
+ordering the table by attribute "position" automatically entails
+ordering by attribute "percent". This can be seen in the table above.
+
+In other words, these dependencies indicate that the ordering of
+percents entails an automatic ordering of the positions and vice
+versa.
+
+simple ods: 2
+{3} : [] -> 2<=
+{1,2} : [] -> 3<=
+
+Dependency "{3} : [] -> 2<=" means that inside each equivalence
+class from "percent" the constancy of the attribute "position" can
+be traced.
+
+We have 5 equivalence classes in "percent":
+class [10%] with 2 elements
+class [50%] with 1 element
+class [40%] with 1 element
+class [35%] with 1 element
+class [55%] with 1 element
+class [10%] with 2 elements
+
+This table shows the constancy of values from attribute "position"
+within each equivalence class from "percent". For clarity, lines
+containing different equivalence classes are colored differently.
+
++--------+------------+-----------+
+|   year | position   | percent   |
+|--------+------------+-----------|
+|   2020 | director   | 10%       |
+|   2020 | other      | 50%       |
+|   2020 | manager    | 40%       |
+|   2021 | manager    | 35%       |
+|   2021 | other      | 55%       |
+|   2021 | director   | 10%       |
++--------+------------+-----------+
+
+Dependency "{1,2} : [] -> 3<=" contains 2 attributes ("year" and
+"position") in its context and means the following: in the context
+of one year and one position the constancy of percents is observed.
+That is, in those tuples in which the year and position are the same,
+the same percent value is observed.
+
+The following table shows these observations.
+
++--------+------------+-----------+
+|   year | position   | percent   |
+|--------+------------+-----------|
+|   2020 | director   | 10%       |
+|   2020 | other      | 50%       |
+|   2020 | manager    | 40%       |
+|   2021 | manager    | 35%       |
+|   2021 | other      | 55%       |
+|   2021 | director   | 10%       |
++--------+------------+-----------+
+
+Consider the following two tables. In the first, dependency
+"{1,2} : [] -> 3<=" continues to exist. But in the second one no
+longer exists, since it is violated in third tuple, where the pair
+(2020, director) corresponds to 20%.
+
+Dependency "{1,2} : [] -> 3<=" continues to exist:
++--------+------------+-----------+
+|   year | position   | percent   |
+|--------+------------+-----------|
+|   2020 | director   | 10%       |
+|   2020 | director   | 10%       |
+|   2020 | director   | 10%       |
+|   2020 | other      | 50%       |
+|   2020 | manager    | 40%       |
+|   2021 | manager    | 35%       |
+|   2021 | other      | 55%       |
+|   2021 | director   | 10%       |
++--------+------------+-----------+
+
+Dependency "{1,2} : [] -> 3<=" no longer exists:
++--------+------------+-----------+
+|   year | position   | percent   |
+|--------+------------+-----------|
+|   2020 | director   | 10%       |
+|   2020 | director   | 10%       |
+|   2020 | director   | 20%       |
+|   2020 | other      | 50%       |
+|   2020 | manager    | 40%       |
+|   2021 | manager    | 35%       |
+|   2021 | other      | 55%       |
+|   2021 | director   | 10%       |
++--------+------------+-----------+
diff --git a/examples/testing/outputs/verifying_aucc_output.txt b/examples/testing/outputs/verifying_aucc_output.txt
new file mode 100644
index 000000000..2ba32f13d
--- /dev/null
+++ b/examples/testing/outputs/verifying_aucc_output.txt
@@ -0,0 +1,64 @@
+Dataset AUCC_example.csv:
+   ID  name  card_num  card_active
+0   1  Alex       665         True
+1   2  Liam       667         True
+2   3  Ezra       553         True
+3   4  Alex       665        False
+4   5  Kian       667        False
+5   6  Otis       111         True
+--------------------------------------------------------------------------------
+Checking whether (ID) UCC holds
+--------------------------------------------------------------------------------
+
+UCC holds, showing stats for AUCC is useless
+
+--------------------------------------------------------------------------------
+Checking whether (name) UCC holds
+It should not hold, there are 2 persons, named Alex
+--------------------------------------------------------------------------------
+
+UCC does not hold
+But AUCC with error = 0.0667 holds
+
+Also:
+Total number of rows violating UCC: 2
+Number of clusters violating UCC: 1
+Clusters violating UCC:
+found 1 clusters violating UCC:
+
+First violating cluster:
+   ID  name  card_num  card_active
+0   1  Alex       665         True
+3   4  Alex       665        False
+
+--------------------------------------------------------------------------------
+Checking whether (card_num) UCC holds
+It should not hold, there are 2 identical card numbers
+--------------------------------------------------------------------------------
+
+UCC does not hold
+But AUCC with error = 0.1333 holds
+
+Also:
+Total number of rows violating UCC: 4
+Number of clusters violating UCC: 2
+Clusters violating UCC:
+found 2 clusters violating UCC:
+
+First violating cluster:
+   ID  name  card_num  card_active
+0   1  Alex       665         True
+3   4  Alex       665        False
+Second violating cluster:
+   ID  name  card_num  card_active
+1   2  Liam       667         True
+4   5  Kian       667        False
+
+--------------------------------------------------------------------------------
+Checking whether (card_num, card_active) UCC holds
+It should hold, cards with identical numbers are not active simultaneously
+--------------------------------------------------------------------------------
+
+UCC holds, showing stats for AUCC is useless
+
+--------------------------------------------------------------------------------
diff --git a/examples/testing/outputs/verifying_fd_afd_output.txt b/examples/testing/outputs/verifying_fd_afd_output.txt
new file mode 100644
index 000000000..9c233efcb
--- /dev/null
+++ b/examples/testing/outputs/verifying_fd_afd_output.txt
@@ -0,0 +1,90 @@
+First, let's look at the duplicates_short.csv table and try to verify the functional dependency in it.
+
+       id             name  ...  phone country
+0      26      Björn Smith  ...     25      RI
+1   11859         Mary Doe  ...      0      EU
+2       1         Mary Doe  ...      4      EU
+3      56      Emily Honjo  ...     55      GZ
+4      30     Björn Tarski  ...     29      PR
+5   17788         Mary Doe  ...      0      EU
+6    5930         Mary Doe  ...      0      EU
+7      58       Lisa Smith  ...     57      CM
+8      29  Björn Shiramine  ...     28      EU
+9      28       Björn Wolf  ...     27      AI
+10     60        Lisa Wolf  ...     59      FC
+11  11886       Björn Wolf  ...     27      AI
+12   5970       Maxine Doe  ...     40      CM
+13     46    Maxine Tarski  ...     45      EU
+14   5957       Björn Wolf  ...     27      AI
+
+[15 rows x 7 columns]
+
+Checking whether [id] -> [name] FD holds
+ FD holds 
+Checking whether [name] -> [credit_score] FD holds
+ FD does not hold 
+Number of clusters violating FD: 2
+ #1 cluster: 
+1: Mary Doe -> 0.0
+2: Mary Doe -> 0.0
+5: Mary Doe -> 0.0
+6: Mary Doe -> nan
+Most frequent rhs value proportion: 0.75
+Num distinct rhs values: 2
+
+ #2 cluster: 
+9: Björn Wolf -> 27.0
+11: Björn Wolf -> 28.0
+14: Björn Wolf -> 27.0
+Most frequent rhs value proportion: 0.6666666666666666
+Num distinct rhs values: 2
+
+We learned that in this case the specified FD does not hold and there are two clusters of rows that contain values that prevent our FD from holding. A cluster (with respect to a fixed FD) is a collection of rows that share the same left-hand side part but differ on the right-hand side one.
+Let's take a closer look at them.
+
+In the first cluster, three values are "0" and a single one is "nan". This suggests that this single entry with the "nan" value is a result of a mistake by someone who is not familiar with the table population policy. Therefore, it should probably be changed to "0".
+
+Now let's take a look at the second cluster. There are two entries: "27" and "28". In this case, it is probably a typo, since buttons 7 and 8 are located close to each other on the keyboard.
+
+Having analyzed these clusters, we can conclude that our FD does not hold due to typos in the data. Therefore, by eliminating them, we can get this FD to hold (and make our dataset error-free).
+
+--------------------------------------------------------------------------------
+Now let's look at the DnD.csv to consider the AFD
+
+  Creature  Strength  HaveMagic
+0     Ogre         9      False
+1     Ogre         6      False
+2      Elf         6       True
+3      Elf         6       True
+4      Elf         1       True
+5    Dwarf         9      False
+6    Dwarf         6      False
+
+Checking whether [Creature] -> [Strength] AFD holds (error threshold = 0.5)
+ AFD with this error threshold holds 
+Checking whether [Creature] -> [Strength] AFD holds (error threshold = 0.1)
+ AFD with this error threshold does not hold 
+But the same  AFD with error threshold = 0.19047619047619047 holds
+
+Similarly to the FD verification primitive, the AFD one can provide a user with clusters:
+
+Number of clusters violating FD: 3
+ #1 cluster: 
+2: Elf -> 6
+3: Elf -> 6
+4: Elf -> 1
+Most frequent rhs value proportion: 0.6666666666666666
+Num distinct rhs values: 2
+
+ #2 cluster: 
+0: Ogre -> 9
+1: Ogre -> 6
+Most frequent rhs value proportion: 0.5
+Num distinct rhs values: 2
+
+ #3 cluster: 
+5: Dwarf -> 9
+6: Dwarf -> 6
+Most frequent rhs value proportion: 0.5
+Num distinct rhs values: 2
+
diff --git a/examples/testing/outputs/verifying_mfd_output.txt b/examples/testing/outputs/verifying_mfd_output.txt
new file mode 100644
index 000000000..b7fa244a5
--- /dev/null
+++ b/examples/testing/outputs/verifying_mfd_output.txt
@@ -0,0 +1 @@
+MFD holds
diff --git a/examples/testing/outputs/verifying_ucc_output.txt b/examples/testing/outputs/verifying_ucc_output.txt
new file mode 100644
index 000000000..25d5ef93b
--- /dev/null
+++ b/examples/testing/outputs/verifying_ucc_output.txt
@@ -0,0 +1,18 @@
+Checking whether (First Name) UCC holds
+UCC does not hold
+Total number of rows violating UCC: 2
+Number of clusters violating UCC: 1
+Clusters violating UCC:
+[4, 5]
+
+Checking whether (First Name, Last Name) UCC holds
+UCC holds
+
+Checking whether (Born Town, Born Country) UCC holds
+UCC does not hold
+Total number of rows violating UCC: 5
+Number of clusters violating UCC: 2
+Clusters violating UCC:
+[2, 3, 4]
+[6, 7]
+
diff --git a/examples/testing/test_examples.sh b/examples/testing/test_examples.sh
new file mode 100644
index 000000000..817c0298f
--- /dev/null
+++ b/examples/testing/test_examples.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+echo "Testing afd_multiple_error_thresholds" && python3 examples/afd_multiple_error_thresholds.py | diff - examples/testing/outputs/afd_multiple_error_thresholds_output.txt
+
+echo "Testing algebraic_constraints" && python3 examples/algebraic_constraints.py | diff - examples/testing/outputs/algebraic_constraints_output.txt
+
+echo "Testing anomaly_detection" && python3 examples/anomaly_detection.py | diff - examples/testing/outputs/anomaly_detection_output.txt
+
+echo "Testing comparison_pfd_vs_afd" && python3 examples/comparison_pfd_vs_afd.py | diff - examples/testing/outputs/comparison_pfd_vs_afd_output.txt
+
+echo "Testing data_stats" && python3 examples/data_stats.py | diff - examples/testing/outputs/data_stats_output.txt
+
+#command sed "s,\x1B\[[0-9;]*[a-zA-Z],,g" removes ANSI color codes from output
+echo "Testing mine_typos" && python3 examples/mine_typos.py | sed "s,\x1B\[[0-9;]*[a-zA-Z],,g" | diff - examples/testing/outputs/mine_typos_output.txt
+
+echo "Testing mining_afd" && python3 examples/mining_afd.py | diff - examples/testing/outputs/mining_afd_output.txt
+
+echo "Testing mining_fd" && python3 examples/mining_fd.py | diff - examples/testing/outputs/mining_fd_output.txt
+
+echo "Testing mining_ind" && python3 examples/mining_ind.py | diff --color=never - examples/testing/outputs/mining_ind_output.txt
+
+echo "Testing mining_list_od" && python3 examples/mining_list_od.py | diff - examples/testing/outputs/mining_list_od_output.txt
+
+echo "Testing mining_pfd" && python3 examples/mining_pfd.py | diff - examples/testing/outputs/mining_pfd_output.txt
+
+echo "Testing mining_set_od_1" && python3 examples/mining_set_od_1.py | diff - examples/testing/outputs/mining_set_od_1_output.txt
+
+echo "Testing mining_set_od_2" && python3 examples/mining_set_od_2.py --color=never | sed "s,\x1B\[[0-9;]*[a-zA-Z],,g" | diff - examples/testing/outputs/mining_set_od_2_output.txt
+
+echo "Testing verifying_aucc" && python3 examples/verifying_aucc.py | diff - examples/testing/outputs/verifying_aucc_output.txt
+
+echo "Testing verifying_fd_afd" && python3 examples/verifying_fd_afd.py | sed "s,\x1B\[[0-9;]*[a-zA-Z],,g" | diff --color=never - examples/testing/outputs/verifying_fd_afd_output.txt
+
+echo "Testing verifying_mfd" && python3 examples/verifying_mfd.py | diff - examples/testing/outputs/verifying_mfd_output.txt
+
+echo "Testing verifying_ucc" && python3 examples/verifying_ucc.py | diff - examples/testing/outputs/verifying_ucc_output.txt
+
+#!!!there are warnings in the current dedupe.py version
+echo "Testing dedupe" && python3 -W ignore examples/dedupe.py < examples/testing/inputs/dedupe_input.txt  | diff - examples/testing/outputs/dedupe_output.txt
+
+echo "Testing mining_cfd" && python3 examples/mining_cfd.py | sed "s,\x1B\[[0-9;]*[a-zA-Z],,g" | diff - examples/testing/outputs/mining_cfd_output.txt
diff --git a/src/core/algorithms/fd/fd_algorithm.cpp b/src/core/algorithms/fd/fd_algorithm.cpp
index 420dbd4f0..2827253fd 100644
--- a/src/core/algorithms/fd/fd_algorithm.cpp
+++ b/src/core/algorithms/fd/fd_algorithm.cpp
@@ -27,6 +27,20 @@ void FDAlgorithm::ResetState() {
     ResetStateFd();
 }
 
+std::list<FD>& FDAlgorithm::SortedFdList() {
+    fd_collection_.AsList().sort([](const FD& l_fd, const FD& r_fd) {
+        if (l_fd.GetLhs().GetArity() != r_fd.GetLhs().GetArity()) {
+            return l_fd.GetLhs().GetArity() < r_fd.GetLhs().GetArity();
+        }
+        if (l_fd.GetLhs() != r_fd.GetLhs()) {
+            return l_fd.GetLhs() < r_fd.GetLhs();
+        }
+        return l_fd.GetRhsIndex() < r_fd.GetRhsIndex();
+    });
+
+    return fd_collection_.AsList();
+}
+
 std::string FDAlgorithm::GetJsonFDs() const {
     return FDsToJson(FdList());
 }
diff --git a/src/core/algorithms/fd/fd_algorithm.h b/src/core/algorithms/fd/fd_algorithm.h
index f80d17ff3..965505c72 100644
--- a/src/core/algorithms/fd/fd_algorithm.h
+++ b/src/core/algorithms/fd/fd_algorithm.h
@@ -66,6 +66,8 @@ class FDAlgorithm : public Algorithm {
         return fd_collection_.AsList();
     }
 
+    std::list<FD>& SortedFdList();
+
     /* возвращает набор ФЗ в виде JSON-а. По сути, это просто представление фиксированного формата
      * для сравнения результатов разных алгоритмов. JSON - на всякий случай, если потом, например,
      * понадобится загрузить список в питон и как-нибудь его поанализировать
diff --git a/src/core/model/table/vertical.cpp b/src/core/model/table/vertical.cpp
index 5817a0aba..697097071 100644
--- a/src/core/model/table/vertical.cpp
+++ b/src/core/model/table/vertical.cpp
@@ -143,11 +143,3 @@ std::vector<Vertical> Vertical::GetParents() const {
     }
     return parents;
 }
-
-bool Vertical::operator<(Vertical const& rhs) const {
-    assert(*schema_ == *rhs.schema_);
-    if (this->column_indices_ == rhs.column_indices_) return false;
-
-    boost::dynamic_bitset<> const& lr_xor = (this->column_indices_ ^ rhs.column_indices_);
-    return rhs.column_indices_.test(lr_xor.find_first());
-}
diff --git a/src/core/model/table/vertical.h b/src/core/model/table/vertical.h
index 672622b50..99cf675b2 100644
--- a/src/core/model/table/vertical.h
+++ b/src/core/model/table/vertical.h
@@ -42,7 +42,9 @@ class Vertical {
      * it treats bitsets little endian during comparison and this is not
      * suitable for this case, check out operator< for Columns.
      */
-    bool operator<(Vertical const& rhs) const;
+    bool operator<(Vertical const& rhs) const {
+        return column_indices_ < rhs.column_indices_;
+    }
 
     bool operator==(Vertical const& other) const {
         return column_indices_ == other.column_indices_;
diff --git a/src/python_bindings/fd/bind_fd.cpp b/src/python_bindings/fd/bind_fd.cpp
index 609f7eb8d..78fa4b0cf 100644
--- a/src/python_bindings/fd/bind_fd.cpp
+++ b/src/python_bindings/fd/bind_fd.cpp
@@ -56,8 +56,7 @@ void BindFd(py::module_& main_module) {
     static constexpr auto kPFDTaneName = "PFDTane";
     auto fd_algos_module =
             BindPrimitive<hyfd::HyFD, Aid, Depminer, DFD, FastFDs, FDep, FdMine, FUN, Pyro, Tane,
-                          PFDTane>(fd_module, py::overload_cast<>(&FDAlgorithm::FdList, py::const_),
-                                   "FdAlgorithm", "get_fds",
+                          PFDTane>(fd_module, &FDAlgorithm::SortedFdList, "FdAlgorithm", "get_fds",
                                    {"HyFD", "Aid", "Depminer", "DFD", "FastFDs", "FDep", "FdMine",
                                     "FUN", kPyroName, kTaneName, kPFDTaneName});