diff --git a/.linters/cpp/cpplint.py b/.linters/cpp/cpplint.py index 5cc9b348eb4..c59d615509e 100755 --- a/.linters/cpp/cpplint.py +++ b/.linters/cpp/cpplint.py @@ -1286,11 +1286,11 @@ def Check(self, error, filename, linenum): # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... if error_level > 5: error_level = 5 - error(filename, linenum, 'readability/fn_size', error_level, - 'Small and focused functions are preferred:' - ' %s has %d non-comment lines' - ' (error triggered by exceeding %d lines).' % ( - self.current_function, self.lines_in_function, trigger)) + # error(filename, linenum, 'readability/fn_size', error_level, + # 'Small and focused functions are preferred:' + # ' %s has %d non-comment lines' + # ' (error triggered by exceeding %d lines).' % ( + # self.current_function, self.lines_in_function, trigger)) def End(self): """Stop analyzing function body.""" diff --git a/src/graph/CMakeLists.txt b/src/graph/CMakeLists.txt index 45a0bdbc33e..3761480298d 100644 --- a/src/graph/CMakeLists.txt +++ b/src/graph/CMakeLists.txt @@ -2,6 +2,7 @@ # # This source code is licensed under Apache 2.0 License. + nebula_add_subdirectory(context) nebula_add_subdirectory(executor) nebula_add_subdirectory(optimizer) diff --git a/src/graph/executor/CMakeLists.txt b/src/graph/executor/CMakeLists.txt index 25da101a494..ecc261bdb15 100644 --- a/src/graph/executor/CMakeLists.txt +++ b/src/graph/executor/CMakeLists.txt @@ -5,6 +5,9 @@ nebula_add_library( executor_obj OBJECT Executor.cpp + subgraph_provenance/ceci.cpp + subgraph_provenance/graph.cpp + subgraph_provenance/subgraph.cpp StorageAccessExecutor.cpp logic/LoopExecutor.cpp logic/PassThroughExecutor.cpp @@ -90,4 +93,4 @@ nebula_add_library( mutate/UpdateExecutor.cpp ) -nebula_add_subdirectory(test) + diff --git a/src/graph/executor/algo/IsomorExecutor.cpp b/src/graph/executor/algo/IsomorExecutor.cpp index c20683e0f15..e9803c8df23 100644 --- a/src/graph/executor/algo/IsomorExecutor.cpp +++ b/src/graph/executor/algo/IsomorExecutor.cpp @@ -1,88 +1,181 @@ // Copyright (c) 2020 vesoft inc. All rights reserved. // // This source code is licensed under Apache 2.0 License. - #include "graph/executor/algo/IsomorExecutor.h" -#include "graph/planner/plan/Algo.h" +#include +#include +#include +#include "graph/executor/subgraph_provenance/graph.h" +#include "graph/executor/subgraph_provenance/subgraph.h" +#include "graph/planner/plan/Algo.h" namespace nebula { namespace graph { +static const char kDefaultProp[] = "default"; // + folly::Future IsomorExecutor::execute() { - // TODO: Replace the following codes with subgraph matching. Return type. - SCOPED_TIMER(&execTime_); - auto* subgraph = asNode(node()); - DataSet ds; - ds.colNames = subgraph->colNames(); - - uint32_t steps = subgraph->steps(); - const auto& currentStepVal = ectx_->getValue(subgraph->currentStepVar()); - DCHECK(currentStepVal.isInt()); - auto currentStep = currentStepVal.getInt(); - auto resultVar = subgraph->resultVar(); - - auto iter = ectx_->getResult(subgraph->inputVar()).iter(); - auto gnSize = iter->size(); - - ResultBuilder builder; - builder.value(iter->valuePtr()); - - std::unordered_map currentVids; - currentVids.reserve(gnSize); - historyVids_.reserve(historyVids_.size() + gnSize); - if (currentStep == 1) { - for (; iter->valid(); iter->next()) { - const auto& src = iter->getColumn(nebula::kVid); - currentVids.emplace(src, 0); - } - iter->reset(); - } - auto& biDirectEdgeTypes = subgraph->biDirectEdgeTypes(); - while (iter->valid()) { - const auto& dst = iter->getEdgeProp("*", nebula::kDst); - auto findIter = historyVids_.find(dst); - if (findIter != historyVids_.end()) { - if (biDirectEdgeTypes.empty()) { - iter->next(); - } else { - const auto& typeVal = iter->getEdgeProp("*", nebula::kType); - if (UNLIKELY(!typeVal.isInt())) { - iter->erase(); - continue; - } - auto type = typeVal.getInt(); - if (biDirectEdgeTypes.find(type) != biDirectEdgeTypes.end()) { - if (type < 0 || findIter->second + 2 == currentStep) { - iter->erase(); - } else { - iter->next(); - } - } else { - iter->next(); - } - } - } else { - if (currentStep == steps) { - iter->erase(); - continue; - } - if (currentVids.emplace(dst, currentStep).second) { - Row row; - row.values.emplace_back(std::move(dst)); - ds.rows.emplace_back(std::move(row)); - } - iter->next(); - } - } - iter->reset(); - builder.iter(std::move(iter)); - ectx_->setResult(resultVar, builder.build()); - // update historyVids - historyVids_.insert(std::make_move_iterator(currentVids.begin()), - std::make_move_iterator(currentVids.end())); - return finish(ResultBuilder().value(Value(std::move(ds))).build()); - } + // TODO: Replace the following codes with subgraph matching. Return type. + // Define 2: + SCOPED_TIMER(&execTime_); + auto* isomor = asNode(node()); + DataSet ds; + ds.colNames = isomor->colNames(); + auto iterDV = ectx_->getResult(isomor->getdScanVOut()).iter(); + auto iterQV = ectx_->getResult(isomor->getqScanVOut()).iter(); + auto iterDE = ectx_->getResult(isomor->getdScanEOut()).iter(); + auto iterQE = ectx_->getResult(isomor->getqScanEOut()).iter(); + unsigned int v_count = iterDV->size(); + unsigned int l_count = iterDV->size(); + unsigned int e_count = iterDE->size(); + // Example: + // Vetices 3: 0, 1, 2, 3 + // Edges: + // 0 1 + // + // To store the degree of each vertex + unsigned int* degree = new unsigned int[v_count]; + + // To store the starting position of each vertex in neighborhood array. + + unsigned int* offset = new unsigned int[v_count + 1]; + + // Array of the neighborhood can be initialized by 2 dimension of the matrix, + // However, here we use 2*edge count as we have in edge and out edges. + unsigned int* neighbors = new unsigned int[e_count * 2]; + unsigned int* labels = new unsigned int[l_count]; + + // Initialize the degree for data graph + for (unsigned int i = 0; i < v_count; i++) { + degree[i] = 0; + } + + // load data vertices id and tags + while (iterDV->valid()) { + const auto vertex = iterDV->getColumn(nebula::kVid); // check if v is a vertex + auto v_id = vertex.getInt(); + const auto label = iterDV->getColumn(nebula::graph::kDefaultProp); // get label by index + auto l_id = label.getInt(); + // unsigned int v_id = (unsigned int)v.getInt(0); + labels[v_id] = l_id; // Tag Id + iterDV->next(); + } + + // load edges degree + while (iterDE->valid()) { + auto s = iterDE->getEdgeProp("*", kSrc); + unsigned int src = s.getInt(); + degree[src]++; + iterDE->next(); + } + + // caldulate the start position of each vertex in the neighborhood array + for (unsigned int i = 0; i < v_count; i++) { + offset[i + 1] += degree[i] + offset[i]; + } + + // load data edges + offset[0] = 0; + iterDE = ectx_->getResult(isomor->getdScanEOut()).iter(); + while (iterDE->valid()) { + unsigned int src = iterDE->getEdgeProp("*", kSrc).getInt(); + unsigned int dst = iterDE->getEdgeProp("*", kDst).getInt(); + + neighbors[offset[src + 1]] = dst; + offset[src + 1]++; + iterDE->next(); + } + for (unsigned int i = 0; i < v_count; i++) { + offset[i + 1] = offset[i]; + } + + Graph* data_graph = new Graph(); + data_graph->loadGraphFromExecutor(v_count, l_count, e_count, offset, neighbors, labels); + + // load query vertices id and tags + while (iterQV->valid()) { + const auto vertex = iterQV->getColumn(nebula::kVid); // check if v is a vertex + auto v_id = vertex.getInt(); + const auto label = iterQV->getColumn(nebula::graph::kDefaultProp); // get label by index + auto l_id = label.getInt(); + // unsigned int v_id = (unsigned int)v.getInt(0); + labels[v_id] = l_id; // Tag Id + iterQV->next(); + } + + // Initialize the degree for query graph + for (unsigned int i = 0; i < v_count; i++) { + degree[i] = 0; + } + + // load query edges degree + while (iterQE->valid()) { + auto s = iterQE->getEdgeProp("*", kSrc); + unsigned int src = s.getInt(); + offset[src]++; + iterDE->next(); + } + + // caldulate the start position of each vertex in the neighborhood array + for (unsigned int i = 0; i < v_count; i++) { + offset[i + 1] += offset[i]; + } + + // load query edges + offset[0] = 0; + iterQE = ectx_->getResult(isomor->getdScanEOut()).iter(); + while (iterDE->valid()) { + unsigned int src = iterQE->getEdgeProp("*", kSrc).getInt(); + unsigned int dst = iterQE->getEdgeProp("*", kDst).getInt(); + + neighbors[offset[src + 1]] = dst; + offset[src + 1]++; + iterQE->next(); + } + for (unsigned int i = 0; i < v_count; i++) { + offset[i + 1] = offset[i]; + } + + Graph* query_graph = new Graph(); + query_graph->loadGraphFromExecutor(v_count, l_count, e_count, offset, neighbors, labels); + + ui** candidates = nullptr; + ui* candidates_count = nullptr; + + TreeNode* ceci_tree = nullptr; + ui* ceci_order = nullptr; + ui* provenance = nullptr; + + std::vector>> + P_Candidates; // Parent, first branch, second branch. + std::vector>> P_Provenance; + // std::cout"Provenance Function: " << std::endl:endl; + + bool result = CECIFunction(data_graph, + query_graph, + candidates, + candidates_count, + ceci_order, + provenance, + ceci_tree, + P_Candidates, + P_Provenance); + delete data_graph; + delete query_graph; + delete[] ceci_order; + delete[] provenance; + delete[] candidates_count; + delete[] candidates; + delete ceci_tree; + + delete[] offset; + delete[] neighbors; + delete[] labels; + ResultBuilder builder; + // Set result in the ds and set the new column name for the (isomor matching 's) result. + return finish(ResultBuilder().value(Value(std::move(result))).build()); +} } // namespace graph } // namespace nebula diff --git a/src/graph/executor/subgraph_provenance/.gitignore b/src/graph/executor/subgraph_provenance/.gitignore new file mode 100644 index 00000000000..6a3c664fb85 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/.gitignore @@ -0,0 +1,3 @@ +*.o +dataset/* +test/* diff --git a/src/graph/executor/subgraph_provenance/README.md b/src/graph/executor/subgraph_provenance/README.md new file mode 100644 index 00000000000..26f68e43701 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/README.md @@ -0,0 +1,6 @@ +# subgraph_provenance + + +`make all` + +`./ceci test/sample_dataset/query.graph test/sample_dataset/data.graph` diff --git a/src/graph/executor/subgraph_provenance/bitsetoperation.cpp b/src/graph/executor/subgraph_provenance/bitsetoperation.cpp new file mode 100644 index 00000000000..c26287fd734 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/bitsetoperation.cpp @@ -0,0 +1,186 @@ +// Copyright [2022] +// +// Get from Dr. Lemire. +// + +#include "bitsetoperation.h" + +void BitsetOperation::setBitsetList(void *bitset, const uint32_t *list, uint32_t length) { + uint64_t pos; + const uint32_t *end = list + length; + + uint64_t shift = 6; + uint64_t offset; + uint64_t load; + for (; list + 3 < end; list += 4) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [ load ] "=&r"(load), [ offset ] "=&r"(offset) + : [ bitset ] "r"(bitset), [ shift ] "r"(shift), [ pos ] "r"(pos)); + pos = list[1]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [ load ] "=&r"(load), [ offset ] "=&r"(offset) + : [ bitset ] "r"(bitset), [ shift ] "r"(shift), [ pos ] "r"(pos)); + pos = list[2]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [ load ] "=&r"(load), [ offset ] "=&r"(offset) + : [ bitset ] "r"(bitset), [ shift ] "r"(shift), [ pos ] "r"(pos)); + pos = list[3]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [ load ] "=&r"(load), [ offset ] "=&r"(offset) + : [ bitset ] "r"(bitset), [ shift ] "r"(shift), [ pos ] "r"(pos)); + } + + while (list != end) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [ load ] "=&r"(load), [ offset ] "=&r"(offset) + : [ bitset ] "r"(bitset), [ shift ] "r"(shift), [ pos ] "r"(pos)); + list++; + } +} + +void BitsetOperation::intersectBitsetWithBitset(const uint64_t *__restrict__ a, + const uint64_t *__restrict__ b, + uint64_t *__restrict__ output, + const uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + output[i] = a[i] & b[i]; + } +} + +uint32_t BitsetOperation::intersectArrayWithBitset(const uint32_t *a, + const uint32_t a_count, + const uint64_t *b, + uint32_t *output) { + uint32_t count = 0; + for (uint32_t i = 0; i < a_count; ++i) { + uint32_t element = a[i]; + output[count] = element; + count += checkBitset(b, element); + } + return count; +} + +bool BitsetOperation::checkBitset(const uint64_t *bitset, uint32_t pos) { + uint64_t word = bitset[pos >> 6]; + const uint64_t p = pos; + __asm volatile("shrx %1, %0, %0" + : "+r"(word) + : /* read/write */ + "r"(p) /* read only */ + ); + return static_cast(word & 1); +} + +uint32_t BitsetOperation::intersectArrayWithArray( + const uint32_t *a, uint32_t a_count, const uint32_t *b, uint32_t b_count, uint32_t *output) { + uint32_t count; + const uint32_t skew_threshold = 32; + if (a_count * skew_threshold < b_count) { + count = skewIntersection(a, a_count, b, b_count, output); + } else if (b_count * skew_threshold < a_count) { + count = skewIntersection(b, b_count, a, a_count, output); + } else { + count = mergeIntersection(a, a_count, b, b_count, output); + } + return count; +} + +uint32_t BitsetOperation::mergeIntersection( + const uint32_t *a, uint32_t a_count, const uint32_t *b, uint32_t b_count, uint32_t *output) { + const uint32_t *initout = output; + if (a_count == 0 || b_count == 0) return 0; + const uint32_t *endA = a + a_count; + const uint32_t *endB = b + b_count; + + while (true) { + while (*a < *b) { +SKIP_FIRST_COMPARE: + if (++a == endA) return (output - initout); + } + while (*a > *b) { + if (++b == endB) return (output - initout); + } + if (*a == *b) { + *output++ = *a; + if (++a == endA || ++b == endB) return (output - initout); + } else { + goto SKIP_FIRST_COMPARE; + } + } + return (output - initout); // NOTREACHED +} + +uint32_t BitsetOperation::skewIntersection(const uint32_t *small, + uint32_t small_count, + const uint32_t *large, + uint32_t large_count, + uint32_t *output) { + uint32_t count = 0; + if (0 == small_count) { + return count; + } + + for (uint32_t i = 0; i < small_count; ++i) { + output[count] = small[i]; + count += binarySearch(large, 0, large_count, small[i]); + } + + return count; +} + +bool BitsetOperation::binarySearch(const uint32_t *src, + uint32_t begin, + uint32_t end, + uint32_t target) { + int32_t temp_begin = begin; + int32_t temp_end = end - 1; + while (temp_begin <= temp_end) { + int mid = temp_begin + ((temp_end - temp_begin) >> 1); + if (src[mid] > target) + temp_end = mid - 1; + else if (src[mid] < target) + temp_begin = mid + 1; + else + return true; + } + + return false; +} + +uint32_t BitsetOperation::extractBitset(const uint64_t *bitset, uint32_t length, uint32_t *output) { + int outpos = 0; + int base = 0; + for (uint32_t i = 0; i < length; ++i) { + uint64_t w = bitset[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + output[outpos++] = static_cast(r + base); + w ^= t; + } + base += 64; + } + return static_cast(outpos); +} diff --git a/src/graph/executor/subgraph_provenance/bitsetoperation.h b/src/graph/executor/subgraph_provenance/bitsetoperation.h new file mode 100644 index 00000000000..c9780fc89b5 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/bitsetoperation.h @@ -0,0 +1,36 @@ +// Copyright [2022] +// +// Get from Dr. Lemire. +// + +#ifndef SUBGRAPHMATCHING_BITSETOPERATION_H +#define SUBGRAPHMATCHING_BITSETOPERATION_H + +#include + +class BitsetOperation { + public: + static uint32_t extractBitset(const uint64_t *bitset, uint32_t length, uint32_t *output); + static void setBitsetList(void *bitset, const uint32_t *list, uint32_t length); + static bool checkBitset(const uint64_t *bitset, uint32_t pos); + static void intersectBitsetWithBitset(const uint64_t *a, + const uint64_t *b, + uint64_t *output, + uint32_t count); + static uint32_t intersectArrayWithBitset(const uint32_t *a, + uint32_t a_count, + const uint64_t *b, + uint32_t *output); + static uint32_t intersectArrayWithArray( + const uint32_t *a, uint32_t a_count, const uint32_t *b, uint32_t b_count, uint32_t *output); + static uint32_t mergeIntersection( + const uint32_t *a, uint32_t a_count, const uint32_t *b, uint32_t b_count, uint32_t *output); + static uint32_t skewIntersection(const uint32_t *small, + uint32_t small_count, + const uint32_t *large, + uint32_t large_count, + uint32_t *output); + static bool binarySearch(const uint32_t *src, uint32_t begin, uint32_t end, uint32_t target); +}; + +#endif // SUBGRAPHMATCHING_BITSETOPERATION_H diff --git a/src/graph/executor/subgraph_provenance/ceci b/src/graph/executor/subgraph_provenance/ceci new file mode 100755 index 00000000000..5556ba2f1f1 Binary files /dev/null and b/src/graph/executor/subgraph_provenance/ceci differ diff --git a/src/graph/executor/subgraph_provenance/ceci.cpp b/src/graph/executor/subgraph_provenance/ceci.cpp new file mode 100644 index 00000000000..843b4706c6c --- /dev/null +++ b/src/graph/executor/subgraph_provenance/ceci.cpp @@ -0,0 +1,67 @@ +// Copyright [2022] +#include +#include +#include + +#include "graph.h" +#include "subgraph.h" +// TODO: +// (1) Build from the CSR compressed files. +// (2) Build Reverse Refinement +// (3) Print_out the Tree. +// + +int ceci() { + std::string input_query_graph_file; // = argv[1]; + std::string input_data_graph_file; // = argv[2]; + + Graph* query_graph = new Graph(); + query_graph->loadGraph(input_query_graph_file); + + Graph* data_graph = new Graph(); + data_graph->loadGraph(input_data_graph_file); + + // std::cout"-----" << std::endl:endl; + // std::cout"Query Graph Meta Information" << std::endl:endl; + query_graph->printGraph(); + + // std::cout"-----" << std::endl:endl; + // data_graph->printGraph(); + + // std::cout"--------------------------------------------------------------------" << + // std::endl:endl; + + /** + * Start queries. + */ + + // std::cout"Start queries..." << std::endl:endl; + // std::cout"-----" << std::endl:endl; + + ui** candidates = nullptr; + ui* candidates_count = nullptr; + + TreeNode* ceci_tree = nullptr; + ui* ceci_order = nullptr; + ui* provenance = nullptr; + + std::vector>> + P_Candidates; // Parent, first branch, second branch. + std::vector>> P_Provenance; + // std::cout"Provenance Function: " << std::endl:endl; + + bool result = CECIFunction(data_graph, + query_graph, + candidates, + candidates_count, + ceci_order, + provenance, + ceci_tree, + P_Candidates, + P_Provenance); + // std::cout"Function End: " << std::endl:endl; + // std::vector>> TE_Candidates; // + // std::vector<> + + return result; +} diff --git a/src/graph/executor/subgraph_provenance/computesetintersection.cpp b/src/graph/executor/subgraph_provenance/computesetintersection.cpp new file mode 100644 index 00000000000..a8076a62234 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/computesetintersection.cpp @@ -0,0 +1,1147 @@ +// Copyright [2022] +// +// Created by ssunah on 11/30/17. +// + +#include "computesetintersection.h" + +#include + +void ComputeSetIntersection::ComputeCandidates(const V_ID *larray, + const ui l_count, + const V_ID *rarray, + const ui r_count, + V_ID *cn, + ui &cn_count) { +#if HYBRID == 0 +#if SI == 0 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGallopingAVX2(larray, l_count, rarray, r_count, cn, cn_count); + } else { + return ComputeCNMergeBasedAVX2(larray, l_count, rarray, r_count, cn, cn_count); + } +#elif SI == 1 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGallopingAVX512(larray, l_count, rarray, r_count, cn, cn_count); + } else { + return ComputeCNMergeBasedAVX512(larray, l_count, rarray, r_count, cn, cn_count); + } +#elif SI == 2 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGalloping(larray, l_count, rarray, r_count, cn, cn_count); + } else { + return ComputeCNNaiveStdMerge(larray, l_count, rarray, r_count, cn, cn_count); + } +#endif +#elif HYBRID == 1 +#if SI == 0 + return ComputeCNMergeBasedAVX2(larray, l_count, rarray, r_count, cn, cn_count); +#elif SI == 1 + return ComputeCNMergeBasedAVX512(larray, l_count, rarray, r_count, cn, cn_count); +#elif SI == 2 + return ComputeCNNaiveStdMerge(larray, l_count, rarray, r_count, cn, cn_count); +#endif +#endif +} + +void ComputeSetIntersection::ComputeCandidates( + const V_ID *larray, const ui l_count, const V_ID *rarray, const ui r_count, ui &cn_count) { +#if HYBRID == 0 +#if SI == 0 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGallopingAVX2(larray, l_count, rarray, r_count, cn_count); + } else { + return ComputeCNMergeBasedAVX2(larray, l_count, rarray, r_count, cn_count); + } +#elif SI == 1 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGallopingAVX512(larray, l_count, rarray, r_count, cn_count); + } else { + return ComputeCNMergeBasedAVX512(larray, l_count, rarray, r_count, cn_count); + } +#elif SI == 2 + if (l_count / 50 > r_count || r_count / 50 > l_count) { + return ComputeCNGalloping(larray, l_count, rarray, r_count, cn_count); + } else { + return ComputeCNNaiveStdMerge(larray, l_count, rarray, r_count, cn_count); + } +#endif +#elif HYBRID == 1 +#if SI == 0 + return ComputeCNMergeBasedAVX2(larray, l_count, rarray, r_count, cn_count); +#elif SI == 1 + return ComputeCNMergeBasedAVX512(larray, l_count, rarray, r_count, cn_count); +#elif SI == 2 + return ComputeCNNaiveStdMerge(larray, l_count, rarray, r_count, cn_count); +#endif +#endif +} + +#if SI == 0 +void ComputeSetIntersection::ComputeCNGallopingAVX2(const V_ID *larray, + const ui l_count, + const V_ID *rarray, + const ui r_count, + V_ID *cn, + ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = GallopingSearchAVX2(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn[cn_count++] = larray[li]; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNGallopingAVX2( + const V_ID *larray, const ui l_count, const V_ID *rarray, const ui r_count, ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = GallopingSearchAVX2(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn_count += 1; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNMergeBasedAVX2(const V_ID *larray, + const ui l_count, + const V_ID *rarray, + const ui r_count, + V_ID *cn, + ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + __m256i per_u_order = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0); + __m256i per_v_order = _mm256_set_epi32(3, 2, 1, 0, 3, 2, 1, 0); + V_ID *cur_back_ptr = cn; + + auto size_ratio = (rc) / (lc); + if (size_ratio > 2) { + if (li < lc && ri + 7 < rc) { + __m256i u_elements = _mm256_set1_epi32(larray[li]); + __m256i v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + + while (true) { + __m256i mask = _mm256_cmpeq_epi32(u_elements, v_elements); + auto real_mask = _mm256_movemask_epi8(mask); + if (real_mask != 0) { + // at most 1 element + *cur_back_ptr = larray[li]; + cur_back_ptr += 1; + } + if (larray[li] > rarray[ri + 7]) { + ri += 8; + if (ri + 7 >= rc) { + break; + } + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + } else { + li++; + if (li >= lc) { + break; + } + u_elements = _mm256_set1_epi32(larray[li]); + } + } + } + } else { + if (li + 1 < lc && ri + 3 < rc) { + __m256i u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + __m256i u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + __m256i v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + __m256i v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + + while (true) { + __m256i mask = _mm256_cmpeq_epi32(u_elements_per, v_elements_per); + auto real_mask = _mm256_movemask_epi8(mask); + if (real_mask << 16 != 0) { + *cur_back_ptr = larray[li]; + cur_back_ptr += 1; + } + if (real_mask >> 16 != 0) { + *cur_back_ptr = larray[li + 1]; + cur_back_ptr += 1; + } + + if (larray[li + 1] == rarray[ri + 3]) { + li += 2; + ri += 4; + if (li + 1 >= lc || ri + 3 >= rc) { + break; + } + u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + } else if (larray[li + 1] > rarray[ri + 3]) { + ri += 4; + if (ri + 3 >= rc) { + break; + } + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + } else { + li += 2; + if (li + 1 >= lc) { + break; + } + u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + } + } + } + } + + cn_count = (ui)(cur_back_ptr - cn); + if (li < lc && ri < rc) { + while (true) { + while (larray[li] < rarray[ri]) { + ++li; + if (li >= lc) { + return; + } + } + while (larray[li] > rarray[ri]) { + ++ri; + if (ri >= rc) { + return; + } + } + if (larray[li] == rarray[ri]) { + // write back + cn[cn_count++] = larray[li]; + + ++li; + ++ri; + if (li >= lc || ri >= rc) { + return; + } + } + } + } + return; +} + +void ComputeSetIntersection::ComputeCNMergeBasedAVX2( + const V_ID *larray, const ui l_count, const V_ID *rarray, const ui r_count, ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + constexpr int parallelism = 8; + + int cn_countv[parallelism] = {0, 0, 0, 0, 0, 0, 0, 0}; + __m256i sse_cn_countv = _mm256_load_si256(reinterpret_cast<__m256i *>(cn_countv)); + __m256i sse_countplus = _mm256_set1_epi32(1); + __m256i per_u_order = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0); + __m256i per_v_order = _mm256_set_epi32(3, 2, 1, 0, 3, 2, 1, 0); + + auto size_ratio = (rc) / (lc); + if (size_ratio > 2) { + if (li < lc && ri + 7 < rc) { + __m256i u_elements = _mm256_set1_epi32(larray[li]); + __m256i v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + + while (true) { + __m256i mask = _mm256_cmpeq_epi32(u_elements, v_elements); + mask = _mm256_and_si256(sse_countplus, mask); + sse_cn_countv = _mm256_add_epi32(sse_cn_countv, mask); + if (larray[li] > rarray[ri + 7]) { + ri += 8; + if (ri + 7 >= rc) { + break; + } + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + } else { + li++; + if (li >= lc) { + break; + } + u_elements = _mm256_set1_epi32(larray[li]); + } + } + _mm256_store_si256(reinterpret_cast<__m256i *> cn_countv, sse_cn_countv); + for (int cn_countvplus : cn_countv) { + cn_count += cn_countvplus; + } + } + } else { + if (li + 1 < lc && ri + 3 < rc) { + __m256i u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + __m256i u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + __m256i v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + __m256i v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + + while (true) { + __m256i mask = _mm256_cmpeq_epi32(u_elements_per, v_elements_per); + mask = _mm256_and_si256(sse_countplus, mask); + sse_cn_countv = _mm256_add_epi32(sse_cn_countv, mask); + + if (larray[li + 1] == rarray[ri + 3]) { + li += 2; + ri += 4; + if (li + 1 >= lc || ri + 3 >= rc) { + break; + } + u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + } else if (larray[li + 1] > rarray[ri + 3]) { + ri += 4; + if (ri + 3 >= rc) { + break; + } + v_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(rarray + ri)); + v_elements_per = _mm256_permutevar8x32_epi32(v_elements, per_v_order); + } else { + li += 2; + if (li + 1 >= lc) { + break; + } + u_elements = _mm256_loadu_si256(reinterpret_cast<__m256i *>(larray + li)); + u_elements_per = _mm256_permutevar8x32_epi32(u_elements, per_u_order); + } + } + } + _mm256_store_si256(reinterpret_cast<__m256i *> cn_countv, sse_cn_countv); + for (int cn_countvplus : cn_countv) { + cn_count += cn_countvplus; + } + } + + if (li < lc && ri < rc) { + while (true) { + while (larray[li] < rarray[ri]) { + ++li; + if (li >= lc) { + return; + } + } + while (larray[li] > rarray[ri]) { + ++ri; + if (ri >= rc) { + return; + } + } + if (larray[li] == rarray[ri]) { + cn_count++; + ++li; + ++ri; + if (li >= lc || ri >= rc) { + return; + } + } + } + } + return; +} + +const ui ComputeSetIntersection::BinarySearchForGallopingSearchAVX2(const V_ID *array, + ui offset_beg, + ui offset_end, + ui val) { + while (offset_end - offset_beg >= 16) { + auto mid = static_cast((static_cast(offset_beg) + offset_end) / 2); + _mm_prefetch( + reinterpret_cast(&array[(static_cast(mid + 1) + offset_end) / 2]), + _MM_HINT_T0); + _mm_prefetch(reinterpret_cast(&array[(static_cast(offset_beg) + mid) / 2]), + _MM_HINT_T0); + if (array[mid] == val) { + return mid; + } else if (array[mid] < val) { + offset_beg = mid + 1; + } else { + offset_end = mid; + } + } + + // linear search fallback, be careful with operator>> && operation+ priority + __m256i pivot_element = _mm256_set1_epi32(val); + for (; offset_beg + 7 < offset_end; offset_beg += 8) { + __m256i elements = _mm256_loadu_si256(reinterpret_cast(array + offset_beg)); + __m256i cmp_res = _mm256_cmpgt_epi32(pivot_element, elements); + int mask = _mm256_movemask_epi8(cmp_res); + if (mask != 0xffffffff) { + return offset_beg + (_popcnt32(mask) >> 2); + } + } + if (offset_beg < offset_end) { + auto left_size = offset_end - offset_beg; + __m256i elements = _mm256_loadu_si256(reinterpret_cast(array + offset_beg)); + __m256i cmp_res = _mm256_cmpgt_epi32(pivot_element, elements); + int mask = _mm256_movemask_epi8(cmp_res); + int cmp_mask = 0xffffffff >> ((8 - left_size) << 2); + mask &= cmp_mask; + if (mask != cmp_mask) { + return offset_beg + (_popcnt32(mask) >> 2); + } + } + return offset_end; +} + +const ui ComputeSetIntersection::GallopingSearchAVX2(const V_ID *array, + ui offset_beg, + ui offset_end, + ui val) { + if (array[offset_end - 1] < val) { + return offset_end; + } + + // linear search + __m256i pivot_element = _mm256_set1_epi32(val); + if (offset_end - offset_beg >= 8) { + __m256i elements = _mm256_loadu_si256(reinterpret_cast(array + offset_beg)); + __m256i cmp_res = _mm256_cmpgt_epi32(pivot_element, elements); + int mask = _mm256_movemask_epi8(cmp_res); + if (mask != 0xffffffff) { + return offset_beg + (_popcnt32(mask) >> 2); + } + } else { + auto left_size = offset_end - offset_beg; + __m256i elements = _mm256_loadu_si256(reinterpret_cast(array + offset_beg)); + __m256i cmp_res = _mm256_cmpgt_epi32(pivot_element, elements); + int mask = _mm256_movemask_epi8(cmp_res); + int cmp_mask = 0xffffffff >> ((8 - left_size) << 2); + mask &= cmp_mask; + if (mask != cmp_mask) { + return offset_beg + (_popcnt32(mask) >> 2); + } + } + + // galloping, should add pre-fetch later + auto jump_idx = 8u; + while (true) { + auto peek_idx = offset_beg + jump_idx; + if (peek_idx >= offset_end) { + return BinarySearchForGallopingSearchAVX2( + array, (jump_idx >> 1) + offset_beg + 1, offset_end, val); + } + if (array[peek_idx] < val) { + jump_idx <<= 1; + } else { + return array[peek_idx] == val + ? peek_idx + : BinarySearchForGallopingSearchAVX2( + array, (jump_idx >> 1) + offset_beg + 1, peek_idx + 1, val); + } + } +} + +#elif SI == 1 +void ComputeSetIntersection::ComputeCNGallopingAVX512(const V_ID *larray, + const ui l_count, + const V_ID *rarray, + const ui r_count, + V_ID *cn, + ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = Utility::GallopingSearchAVX512(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn[cn_count++] = larray[li]; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNGallopingAVX512( + const V_ID *larray, const ui l_count, const V_ID *rarray, const ui r_count, ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = Utility::GallopingSearchAVX512(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn_count += 1; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNMergeBasedAVX512(const V_ID *larray, + const ui l_count, + const V_ID *rarray, + const ui r_count, + V_ID *cn, + ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + __m512i st = _mm512_set_epi32(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0); + + V_ID *cur_back_ptr = cn; + + auto size1 = (rc) / (lc); + if (size1 > 2) { + if (li < lc && ri + 15 < rc) { + __m512i u_elements = _mm512_set1_epi32(larray[li]); + __m512i v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + + while (true) { + __mmask16 mask = _mm512_cmpeq_epi32_mask(u_elements, v_elements); + if (mask != 0x0000) { + // write back + _mm512_mask_compressstoreu_epi32(cur_back_ptr, mask, u_elements); + cur_back_ptr += _popcnt32(mask); + } + + if (larray[li] > rarray[ri + 15]) { + ri += 16; + if (ri + 15 >= rc) { + break; + } + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + } else { + li += 1; + if (li >= lc) { + break; + } + u_elements = _mm512_set1_epi32(larray[li]); + } + } + } + } else { + if (li + 3 < lc && ri + 3 < rc) { + __m512i u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + __m512i u_elements_per = _mm512_permutevar_epi32(st, u_elements); + __m512i v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + __m512i v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + + while (true) { + __mmask16 mask = _mm512_cmpeq_epi32_mask(u_elements_per, v_elements_per); + if (mask != 0x0000) { + // write back + _mm512_mask_compressstoreu_epi32(cur_back_ptr, mask, u_elements_per); + cur_back_ptr += _popcnt32(mask); + } + + if (larray[li + 3] > rarray[ri + 3]) { + ri += 4; + if (ri + 3 >= rc) { + break; + } + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + } else if (larray[li + 3] < rarray[ri + 3]) { + li += 4; + if (li + 3 >= lc) { + break; + } + u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + u_elements_per = _mm512_permutevar_epi32(st, u_elements); + } else { + li += 4; + ri += 4; + if (li + 3 >= lc || ri + 3 >= rc) { + break; + } + u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + u_elements_per = _mm512_permutevar_epi32(st, u_elements); + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + } + } + } + } + + cn_count = (ui)(cur_back_ptr - cn); + + if (li < lc && ri < rc) { + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + while (larray[li] > rarray[ri]) { + ri += 1; + if (ri >= rc) { + return; + } + } + if (larray[li] == rarray[ri]) { + // write back + cn[cn_count++] = larray[li]; + + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } + } + return; +} + +void ComputeSetIntersection::ComputeCNMergeBasedAVX512( + const V_ID *larray, const ui l_count, const V_ID *rarray, const ui r_count, ui &cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + constexpr int parallelism = 16; + __m512i st = _mm512_set_epi32(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0); + __m512i ssecountplus = _mm512_set1_epi32(1); + int cn_countv[parallelism] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + __m512i ssecn_countv = _mm512_set1_epi32(0); + auto size1 = (rc) / (lc); + + if (size1 > 2) { + if (li < lc && ri + 15 < rc) { + __m512i u_elements = _mm512_set1_epi32(larray[li]); + __m512i v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + + while (true) { + __mmask16 mask = _mm512_cmpeq_epi32_mask(u_elements, v_elements); + ssecn_countv = _mm512_mask_add_epi32(ssecn_countv, mask, ssecn_countv, ssecountplus); + + if (larray[li] > rarray[ri + 15]) { + ri += 16; + if (ri + 15 >= rc) { + break; + } + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + } else { + li += 1; + if (li >= lc) { + break; + } + u_elements = _mm512_set1_epi32(larray[li]); + } + } + _mm512_storeu_si512(reinterpret_cast<__m512i *> cn_countv, ssecn_countv); + for (int cn_countvplus : cn_countv) { + cn_count += cn_countvplus; + } + } + } else { + if (li + 3 < lc && ri + 3 < rc) { + __m512i u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + __m512i u_elements_per = _mm512_permutevar_epi32(st, u_elements); + __m512i v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + __m512i v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + + while (true) { + __mmask16 mask = _mm512_cmpeq_epi32_mask(u_elements_per, v_elements_per); + ssecn_countv = _mm512_mask_add_epi32(ssecn_countv, mask, ssecn_countv, ssecountplus); + + if (larray[li + 3] > rarray[ri + 3]) { + ri += 4; + if (ri + 3 >= rc) { + break; + } + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + } else if (larray[li + 3] < rarray[ri + 3]) { + li += 4; + if (li + 3 >= lc) { + break; + } + u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + u_elements_per = _mm512_permutevar_epi32(st, u_elements); + } else { + li += 4; + ri += 4; + if (li + 3 >= lc || ri + 3 >= rc) { + break; + } + u_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(larray + li)); + u_elements_per = _mm512_permutevar_epi32(st, u_elements); + v_elements = _mm512_loadu_si512(reinterpret_cast<__m512i *>(rarray + ri)); + v_elements_per = _mm512_permute4f128_epi32(v_elements, 0b00000000); + } + } + _mm512_storeu_si512(reinterpret_cast<__m512i *> cn_countv, ssecn_countv); + for (int cn_countvplus : cn_countv) { + cn_count += cn_countvplus; + } + } + } + + if (li < lc && ri < rc) { + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + while (larray[li] > rarray[ri]) { + ri += 1; + if (ri >= rc) { + return; + } + } + if (larray[li] == rarray[ri]) { + cn_count += 1; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } + } +} + +#elif SI == 2 +void ComputeSetIntersection::ComputeCNNaiveStdMerge(const V_ID* larray, + const ui l_count, + const V_ID* rarray, + const ui r_count, + V_ID* cn, + ui& cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + if (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } else if (larray[li] > rarray[ri]) { + ri += 1; + if (ri >= rc) { + return; + } + } else { + cn[cn_count++] = larray[li]; + + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNNaiveStdMerge( + const V_ID* larray, const ui l_count, const V_ID* rarray, const ui r_count, ui& cn_count) { + cn_count = 0; + + if (l_count == 0 || r_count == 0) return; + + ui lc = l_count; + ui rc = r_count; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + if (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } else if (larray[li] > rarray[ri]) { + ri += 1; + if (ri >= rc) { + return; + } + } else { + cn_count += 1; + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNGalloping(const V_ID* larray, + const ui l_count, + const V_ID* rarray, + const ui r_count, + V_ID* cn, + ui& cn_count) { + ui lc = l_count; + ui rc = r_count; + cn_count = 0; + if (lc == 0 || rc == 0) return; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = GallopingSearch(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn[cn_count++] = larray[li]; + + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +void ComputeSetIntersection::ComputeCNGalloping( + const V_ID* larray, const ui l_count, const V_ID* rarray, const ui r_count, ui& cn_count) { + ui lc = l_count; + ui rc = r_count; + cn_count = 0; + if (lc == 0 || rc == 0) return; + + if (lc > rc) { + auto tmp = larray; + larray = rarray; + rarray = tmp; + + ui tmp_count = lc; + lc = rc; + rc = tmp_count; + } + + ui li = 0; + ui ri = 0; + + while (true) { + while (larray[li] < rarray[ri]) { + li += 1; + if (li >= lc) { + return; + } + } + + ri = GallopingSearch(rarray, ri, rc, larray[li]); + if (ri >= rc) { + return; + } + + if (larray[li] == rarray[ri]) { + cn_count += 1; + + li += 1; + ri += 1; + if (li >= lc || ri >= rc) { + return; + } + } + } +} + +ui ComputeSetIntersection::GallopingSearch(const V_ID* src, + const ui begin, + const ui end, + const ui target) { + if (src[end - 1] < target) { + return end; + } + // galloping + if (src[begin] >= target) { + return begin; + } + if (src[begin + 1] >= target) { + return begin + 1; + } + if (src[begin + 2] >= target) { + return begin + 2; + } + + ui jump_idx = 4; + ui offset_beg = begin; + while (true) { + ui peek_idx = offset_beg + jump_idx; + if (peek_idx >= end) { + return BinarySearch(src, (jump_idx >> 1) + offset_beg + 1, end, target); + } + if (src[peek_idx] < target) { + jump_idx <<= 1; + } else { + return src[peek_idx] == target + ? peek_idx + : BinarySearch(src, (jump_idx >> 1) + offset_beg + 1, peek_idx + 1, target); + } + } +} + +ui ComputeSetIntersection::BinarySearch(const V_ID* src, + const ui begin, + const ui end, + const ui target) { + int offset_begin = begin; + int offset_end = end; + while (offset_end - offset_begin >= 16) { + auto mid = static_cast((static_cast(offset_begin) + offset_end) / 2); + _mm_prefetch(reinterpret_cast(&src[(mid + 1 + offset_end) / 2]), _MM_HINT_T0); + _mm_prefetch(reinterpret_cast(&src[(mid - 1 + offset_begin) / 2]), _MM_HINT_T0); + if (src[mid] == target) { + return mid; + } else if (src[mid] < target) { + offset_begin = mid + 1; + } else { + offset_end = mid; + } + } + + // linear search fallback + for (auto offset = offset_begin; offset < offset_end; ++offset) { + if (src[offset] >= target) { + return static_cast(offset); + } + } + + return static_cast(offset_end); +} +#endif diff --git a/src/graph/executor/subgraph_provenance/computesetintersection.h b/src/graph/executor/subgraph_provenance/computesetintersection.h new file mode 100644 index 00000000000..db7c70a1f78 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/computesetintersection.h @@ -0,0 +1,83 @@ +// Copyright [2022] +// +// Created by ssunah on 11/30/17. +// + +#ifndef SUBGRAPHMATCHING_COMPUTE_SET_INTERSECTION_H +#define SUBGRAPHMATCHING_COMPUTE_SET_INTERSECTION_H + +#include +#include + +#include "config.h" +#include "graph.h" + +/* + * Because the set intersection is designed for computing common neighbors, the target is uieger. + */ + +class ComputeSetIntersection { + public: +#if HYBRID == 0 + static size_t galloping_cnt_; + static size_t merge_cnt_; +#endif + + static void ComputeCandidates( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, V_ID* cn, ui& cn_count); + static void ComputeCandidates( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, ui& cn_count); + +#if SI == 0 + static void ComputeCNGallopingAVX2( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, V_ID* cn, ui& cn_count); + static void ComputeCNGallopingAVX2( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, ui& cn_count); + + static void ComputeCNMergeBasedAVX2( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, V_ID* cn, ui& cn_count); + static void ComputeCNMergeBasedAVX2( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, ui& cn_count); + static const ui BinarySearchForGallopingSearchAVX2(const V_ID* array, + ui offset_beg, + ui offset_end, + ui val); + static const ui GallopingSearchAVX2(const V_ID* array, ui offset_beg, ui offset_end, ui val); +#elif SI == 1 + + static void ComputeCNGallopingAVX512(const V_ID* larray, + const ui l_count, + const V_ID* rarray, + const ui r_count, + V_ID* cn, + ui& cn_count); + static void ComputeCNGallopingAVX512( + const V_ID* larray, const ui l_count, const V_ID* rarray, const ui r_count, ui& cn_count); + + static void ComputeCNMergeBasedAVX512(const V_ID* larray, + const ui l_count, + const V_ID* rarray, + const ui r_count, + V_ID* cn, + ui& cn_count); + static void ComputeCNMergeBasedAVX512( + const V_ID* larray, const ui l_count, const V_ID* rarray, const ui r_count, ui& cn_count); + +#elif SI == 2 + + static void ComputeCNNaiveStdMerge( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, V_ID* cn, ui& cn_count); + static void ComputeCNNaiveStdMerge( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, ui& cn_count); + + static void ComputeCNGalloping( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, V_ID* cn, ui& cn_count); + static void ComputeCNGalloping( + const V_ID* larray, ui l_count, const V_ID* rarray, ui r_count, ui& cn_count); + static ui GallopingSearch(const V_ID* src, ui begin, ui end, ui target); + static ui BinarySearch(const V_ID* src, ui begin, ui end, ui target); + +#endif +}; + +#endif // FSE_COMPUTESETINTERSECTION_H diff --git a/src/graph/executor/subgraph_provenance/config.h b/src/graph/executor/subgraph_provenance/config.h new file mode 100755 index 00000000000..ac99606d305 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/config.h @@ -0,0 +1,58 @@ +// Copyright [2022] +// +// Created by ssunah on 6/22/18. +// + +#ifndef SUBGRAPHMATCHING_CONFIG_H +#define SUBGRAPHMATCHING_CONFIG_H + +/** + * Set the maximum size of a query graph. By default, we set the value as 64. + */ +#define MAXIMUM_QUERY_GRAPH_SIZE 64 + +/** + * Setting the value as 1 is to (1) enable the neighbor label frequency filter (i.e., NLF filter); + * and (2) enable to check the existence of an edge with the label information. The cost is to (1) + * build an unordered_map for each vertex to store the frequency of the labels of its neighbor; and + * (2) build the label neighbor offset. If the memory can hold the extra memory cost, then enable + * this feature to boost the performance. Otherwise, disable it by setting this value as 0. + */ +#define OPTIMIZED_LABELED_GRAPH 1 + +/** + * Define SPECTRUM to enable spectrum analysis. + */ +// #define SPECTRUM + +/** + * Set intersection method. + * 0: Hybrid method; 1: Merge based set intersections. + */ +#define HYBRID 0 + +/** + * Accelerate set intersection with SIMD instructions. + * 0: AVX2; 1: AVX512; 2: Basic; + */ +#define SI 2 + +/** + * Define ENABLE_QFLITER to enable QFliter set intersection method. + */ +// #define ENABLE_QFLITER 1 + +/** + * Define ENABLE_FAILING_SET to enable the failing set pruning set intersection method. + */ +// #define ENABLE_FAILING_SET + +/** + * Enable collection the distribution of the results in the data graph. + */ + +// #define DISTRIBUTION + +#define PRINT_SEPARATOR "------------------------------" + +#endif // SUBGRAPHMATCHING_CONFIG_H diff --git a/src/graph/executor/subgraph_provenance/graph.cpp b/src/graph/executor/subgraph_provenance/graph.cpp new file mode 100644 index 00000000000..c9df47c8081 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/graph.cpp @@ -0,0 +1,195 @@ +// Copyright [2022] +#include "graph.h" + +#include +#include +#include +#include + +void Graph::printGraph() { + std::cout << "|V|: " << v_count << ", |E|: " << e_count << ", |L|: " << l_count << std::endl; + std::cout << "Max Degree: " << max_degree << ", Max Label Frequency: " << max_label_frequency + << std::endl; + printf("Neighbourhood: "); + // for (int i = 0; i < v_count; i ++) { + // printf("%d, ", offsets[i]); + // + // } + printf("\n"); + for (ui i = 0; i < v_count; i++) { + printf("V_ID: %d, offsets: %d:", i, offsets[i]); + for (ui j = offsets[i]; j < offsets[i + 1]; j++) { + printf("%d, ", neighbors[j]); + std::cout << " (" << getVertexLabel(neighbors[j]) << ") "; + } + printf("\n"); + } + + printf("\n"); +} + +void Graph::BuildReverseIndex() { + reverse_index = new ui[v_count]; + reverse_index_offsets = new ui[l_count + 1]; + reverse_index_offsets[0] = 0; + + ui total = 0; + for (ui i = 0; i < l_count; ++i) { + reverse_index_offsets[i + 1] = total; + total += labels_frequency[i]; + } + + for (ui i = 0; i < v_count; ++i) { + L_ID label = labels[i]; + reverse_index[reverse_index_offsets[label + 1]++] = i; + } +} + +void Graph::BuildNLCF() { // neighbors count + // Map from [label_id] -> count of this label count value. + nlf = new std::unordered_map[v_count]; + nl = new std::unordered_map>[v_count]; + for (ui i = 0; i < v_count; ++i) { + ui count; + V_ID *neighbors_tmp = getVertexNeighbors(i, count); + std::vector v_t; + // count is the number of the offsets[id + 1] - offsets[id]; which is the number of neighbours. + for (ui j = 0; j < count; ++j) { + V_ID u = neighbors_tmp[j]; + L_ID label = getVertexLabel(u); + // If not found, this should be 0; + if (nlf[i].find(label) == nlf[i].end()) { + nlf[i][label] = 0; + nl[i].emplace(label, std::vector()); + } + nlf[i][label] += 1; // Add one after each count. + nl[i][label].push_back(u); + } + for (auto &iter : nl[i]) { + sort(iter.second.begin(), iter.second.end()); + } + } + return; +} + +void Graph::loadGraphFromExecutor(unsigned int v_c, + unsigned int l_c, + unsigned int e_c, + unsigned int *off, + unsigned int *nei, + unsigned int *lab) { + this->v_count = v_c; + this->l_count = l_c; + this->e_count = e_c; + this->offsets = new ui[v_c + 1]; + this->neighbors = new V_ID[e_c * 2]; + this->labels = new L_ID[v_c]; + + for (ui i = 0; i < v_c + 1; ++i) { + this->offsets[i] = off[i]; + } + + for (ui i = 0; i < e_c; ++i) { + this->neighbors[i] = nei[i]; + } + + L_ID max_label_id = 0; + for (ui i = 0; i < v_count; ++i) { + this->labels[i] = lab[i]; + if (labels_frequency.find(lab[i]) == labels_frequency.end()) { + labels_frequency[lab[i]] = 0; + if (lab[i] > max_label_id) { + max_label_id = lab[i]; + } + } + } + // Initialize label count; + if (labels_frequency.size() > max_label_id + 1) { + l_count = labels_frequency.size(); + } else { + l_count = max_label_id + 1; + } + + BuildReverseIndex(); + BuildNLCF(); + return; +} + +void Graph::loadGraph(const std::string &file_path) { + std::ifstream graphFile(file_path); + + if (!graphFile.is_open()) { + std::cout << "Error opening " << file_path << " ." << std::endl; + exit(-1); + } + + char type; + graphFile >> type >> v_count >> e_count; + offsets = new ui[v_count + 1]; + offsets[0] = 0; + + neighbors = new V_ID[e_count * 2]; + labels = new L_ID[v_count]; + l_count = 0; + max_degree = 0; + + L_ID max_label_id = 0; + std::vector neighbors_offsets(v_count, 0); + while (graphFile >> type) { + if (type == 'v') { // Read Vertex, build index of id->label, records its degree. + V_ID id; + L_ID label; + ui degree; + graphFile >> id >> label >> degree; + + labels[id] = label; + offsets[id + 1] = offsets[id] + degree; + if (degree > max_degree) { + max_degree = degree; + } + + if (labels_frequency.find(label) == labels_frequency.end()) { + labels_frequency[label] = 0; + if (label > max_label_id) { + max_label_id = label; + } + } + labels_frequency[label] += 1; + } else if (type == 'e') { // Read edge. + V_ID src; + V_ID dst; + graphFile >> src >> dst; + + ui offset_tmp = offsets[src] + neighbors_offsets[src]; + neighbors[offset_tmp] = dst; + + offset_tmp = offsets[dst] + neighbors_offsets[dst]; + neighbors[offset_tmp] = src; + + neighbors_offsets[src] += 1; + neighbors_offsets[dst] += 1; + } + } + + graphFile.close(); + + // Initialize label count; + if (labels_frequency.size() > max_label_id + 1) { + l_count = labels_frequency.size(); + } else { + l_count = max_label_id + 1; + } + + // Recheck the max_label_id; + // std::cout"Test All: " << std::endl; + + for (auto item : labels_frequency) { + // std::coutitem.second << " "; + if (item.second > max_label_frequency) { + max_label_frequency = item.second; + } + } + + BuildReverseIndex(); + BuildNLCF(); +} diff --git a/src/graph/executor/subgraph_provenance/graph.h b/src/graph/executor/subgraph_provenance/graph.h new file mode 100644 index 00000000000..885e365318c --- /dev/null +++ b/src/graph/executor/subgraph_provenance/graph.h @@ -0,0 +1,148 @@ +// Copyright [2022] +#ifndef CECI_GRAPH_H +#define CECI_GRAPH_H +#include +#include +#include + +#include "trees.h" + +class Graph { + private: + ui v_count; // vertices count + ui e_count; // edges count + ui l_count; // Label count + ui max_degree; + ui max_label_frequency; + + ui* offsets; + + V_ID* neighbors; + L_ID* labels; + + ui* reverse_index_offsets; + ui* reverse_index; + + std::unordered_map labels_frequency; + // ui* labels_offsets; + std::unordered_map* nlf; + std::unordered_map>* nl; + + private: + void BuildReverseIndex(); + void BuildNLCF(); + void BuildLabelOffset(); + + public: + Graph() { + v_count = 0; // vertices count + e_count = 0; // edges count + l_count = 0; // Label count + max_degree = 0; + max_label_frequency = 0; + + offsets = nullptr; + reverse_index_offsets = nullptr; + reverse_index = nullptr; + neighbors = nullptr; + labels = nullptr; + + labels_frequency.clear(); + // labels_offsets = nullptr; + nlf = nullptr; + nl = nullptr; + } + ~Graph() { + delete[] offsets; + delete[] neighbors; + delete[] reverse_index_offsets; + delete[] reverse_index; + delete[] labels; + // delete[] labels_offsets; + delete[] nlf; + delete[] nl; + } + + public: + void loadGraph(const std::string& file_path); + void loadGraphFromExecutor(unsigned int v_count, + unsigned int l_count, + unsigned int e_count, + unsigned int* offset, + unsigned int* neighbors, + unsigned int* labels); + + void printGraph(); + + ui getLabelsCount() { + return l_count; + } + + ui getGraphMaxLabelFrequency() { + return max_label_frequency; + } + + ui* getVerticesByLabel(const L_ID id, ui& count) const { + count = reverse_index_offsets[id + 1] - reverse_index_offsets[id]; + return reverse_index + reverse_index_offsets[id]; + } + + ui getVertexDegree(const V_ID id) const { + return offsets[id + 1] - offsets[id]; + } + + ui getVerticesCount() { + return v_count; + } + + ui getEdgesCount() { + return e_count; + } + + ui getMaxDegree() { + return max_degree; + } + + L_ID getVertexLabel(const V_ID id) { + return labels[id]; + } + + ui* getVertexNeighbors(const V_ID id, ui& count) { + count = offsets[id + 1] - offsets[id]; + return neighbors + offsets[id]; + } + + bool checkEdgeExistence(V_ID u, V_ID v) { + if (getVertexDegree(u) < getVertexDegree(v)) { + std::swap(u, v); + } + + ui count = 0; + neighbors = getVertexNeighbors(v, count); + + int begin = 0; + int end = count - 1; + while (begin <= end) { + int mid = begin + ((end - begin) >> 1); + if (neighbors[mid] == u) { + return true; + } else if (neighbors[mid] > u) { + end = mid - 1; + } else { + begin = mid + 1; + } + } + + return false; + } + + std::unordered_map* getVertexNLF(V_ID i) { + return nlf + i; + } + + std::unordered_map>* getVertexNL(V_ID i) { + return nl + i; + } +}; + +#endif diff --git a/src/graph/executor/subgraph_provenance/run.bash b/src/graph/executor/subgraph_provenance/run.bash new file mode 100755 index 00000000000..207b05d6dad --- /dev/null +++ b/src/graph/executor/subgraph_provenance/run.bash @@ -0,0 +1,27 @@ +#!/bin/bash +type=3 +distributionType=0 +testCount=5 +startPower=22 +stopPower=29 +N=30 +#k=33554431 +beta=2 +declare -a arr=("result.out" "baseline_filter_shuffle" "baseline_filter" "baseline" "U_K16" "U_K31" "U_K64" "U_K128" "U_K256" "U_K512") +rm output.txt +#declare -a k=(1 2 4 8 16 31 64 128 256 512) +#for (( beta=2; beta<3; beta=beta+1 )) +for (( k=1; k<=1; k=k+1 )) +do + for (( N=1; N<=200; N=N+1 )) + do + ./ceci dataset/youtube/query_graph/query_dense_8_$N.graph dataset/youtube/data_graph/youtube.graph #>> ${arr[0]} + echo -n "Finished" + echo $N + #./topk.bin 29 33554431 8 2 + done +done +echo -n "Finihed processing for a k " +printf "\n" +#diff <(grep ':' result.out | sed 's/^.*://') <(grep ':' truth.out | sed 's/^.*://') + diff --git a/src/graph/executor/subgraph_provenance/subgraph.cpp b/src/graph/executor/subgraph_provenance/subgraph.cpp new file mode 100644 index 00000000000..423b65ee43f --- /dev/null +++ b/src/graph/executor/subgraph_provenance/subgraph.cpp @@ -0,0 +1,4576 @@ +// Copyright [2022] +#include "subgraph.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "computesetintersection.h" +#include "trees.h" +#define INVALID_VERTEX_ID 99999 +#undef DEBUG +#define DEBUG 0 +#define Q_LIMIT (((int64_t)1) << 31) + +V_ID InitialStartVertex(Graph *data_graph, Graph *query_graph) { + // Using method of finding minimum scores = count/degree; + double min_score = data_graph->getVerticesCount(); + V_ID start_vertex = 0; + + for (ui i = 0; i < query_graph->getVerticesCount(); ++i) { + ui degree = query_graph->getVertexDegree(i); + ui count = 0; + // Compare the NLF with data graoh and query graph. + ZFComputeNLF(data_graph, query_graph, i, count); + double cur_score = count / static_cast(degree); + if (cur_score < min_score && count > 1) { + min_score = cur_score; + start_vertex = i; + } + } + return start_vertex; +} + +void bfs2(Graph *graph, + V_ID root_v, + TreeNode *&tree, + V_ID *&order, + __attribute__((unused)) V_ID *&provenance) { + // How many vertices. + ui count = graph->getVerticesCount(); + std::queue queue; + std::vector visited(count, false); + // Initialize the tree + tree = new TreeNode[count]; + for (ui i = 0; i < count; ++i) { + tree[i].initialize(count); + } + // This is + order = new V_ID[count]; + ui visited_count = 0; // how many nodes have been visited. + queue.push(root_v); // push the first root node into the BFS queue. + visited[root_v] = true; + tree[root_v].level = 0; + tree[root_v].id = root_v; // node id. + order[visited_count++] = root_v; + + ui u_nbrs_count_1; + // How many neighbors of the node + // get the neighbors informations + V_ID *u_nbrs_1 = graph->getVertexNeighbors(root_v, u_nbrs_count_1); + std::vector> tmp_2; + for (ui i = 0; i < u_nbrs_count_1; ++i) { + V_ID u_nbr = u_nbrs_1[i]; + ui u_nbrs_count; + __attribute__((unused)) V_ID *u_nbrs = graph->getVertexNeighbors(u_nbr, u_nbrs_count); + tmp_2.emplace_back(u_nbrs_count, u_nbr); + } + + // Second node -> + sort(tmp_2.begin(), tmp_2.end()); + ui u_nbr = tmp_2[tmp_2.size() - 1].second; + + if (!visited[u_nbr]) { + visited[u_nbr] = true; + tree[u_nbr].id = u_nbr; + tree[u_nbr].parent = root_v; + tree[u_nbr].level = tree[root_v].level + 1; + tree[root_v].children[tree[root_v].children_count++] = u_nbr; + order[visited_count++] = u_nbr; + } + // Third node -> + if (count > 2) { + u_nbr = tmp_2[tmp_2.size() - 2].second; + + if (!visited[u_nbr]) { + visited[u_nbr] = true; + tree[u_nbr].id = u_nbr; + tree[u_nbr].parent = root_v; + tree[u_nbr].level = tree[root_v].level + 1; + tree[root_v].children[tree[root_v].children_count++] = u_nbr; + order[visited_count++] = u_nbr; + } + } + bool next = true; + while (next) { + std::vector> tmp; + next = false; + for (ui i = 0; i < count; ++i) { + if (visited[i] == false) { + next = true; + ui u_nbrs_count; + V_ID *u_nbrs = graph->getVertexNeighbors(i, u_nbrs_count); + ui u_count = 0; + for (ui j = 0; j < u_nbrs_count; j++) { + if (visited[u_nbrs[j]] == true) { + u_count += 1; + } + } + tmp.emplace_back(u_count, i); + } + } + if (next) { + sort(tmp.begin(), tmp.end()); + ui u_t = tmp[tmp.size() - 1].second; + order[visited_count++] = u_t; + visited[u_t] = true; + ui u_nbrs_count; + V_ID *u_nbrs = graph->getVertexNeighbors(u_t, u_nbrs_count); + // ui u_count = 0; + bool find_child = false; + for (ui i = 0; i < visited_count; i++) { + for (ui j = 0; j < u_nbrs_count; j++) { + if (order[i] == u_nbrs[j]) { + find_child = true; + tree[u_t].id = u_t; + tree[u_t].parent = order[i]; + tree[u_t].level = tree[order[i]].level + 1; + tree[order[i]].children[tree[order[i]].children_count++] = u_t; + break; + } + } + if (find_child) { + break; + } + } + } + } +} + +void generateValidCandidates(Graph *data_graph, + ui depth, + ui *embedding, + ui *idx_count, + ui **valid_candidate, + bool *visited_vertices, + ui **bn, + ui *bn_cnt, + ui *order, + ui **candidates, + ui *candidates_count) { + V_ID u = order[depth]; + idx_count[depth] = 0; + for (ui i = 0; i < candidates_count[u]; ++i) { + V_ID v = candidates[u][i]; + + if (!visited_vertices[v]) { + bool valid = true; + + for (ui j = 0; j < bn_cnt[depth]; ++j) { + V_ID u_nbr = bn[depth][j]; + V_ID u_nbr_v = embedding[u_nbr]; + + if (!data_graph->checkEdgeExistence(v, u_nbr_v)) { + valid = false; + break; + } + } + + if (valid) { + valid_candidate[depth][idx_count[depth]++] = v; + } + } + } +} + +int64_t exploreGraphQLStyle(Graph *data_graph, + Graph *query_graph, + ui **candidates, + ui *candidates_count, + ui *order, + size_t output_limit_num, + int64_t &call_count) { + uint64_t embedding_cnt = 0; + int cur_depth = 0; + int max_depth = query_graph->getVerticesCount(); + V_ID start_vertex = order[0]; + + // Generate the bn. + ui **bn; + ui *bn_count; + + bn = new ui *[max_depth]; + for (int i = 0; i < max_depth; ++i) { + bn[i] = new ui[max_depth]; + } + + bn_count = new ui[max_depth]; + std::fill(bn_count, bn_count + max_depth, 0); + + std::vector visited_query_vertices(max_depth, false); + visited_query_vertices[start_vertex] = true; + for (int i = 1; i < max_depth; ++i) { + V_ID cur_vertex = order[i]; + ui nbr_cnt; + V_ID *nbrs = query_graph->getVertexNeighbors(cur_vertex, nbr_cnt); + + for (ui j = 0; j < nbr_cnt; ++j) { + V_ID nbr = nbrs[j]; + + if (visited_query_vertices[nbr]) { + bn[i][bn_count[i]++] = nbr; + } + } + + visited_query_vertices[cur_vertex] = true; + } + + // Allocate the memory buffer. + ui *idx; + ui *idx_count; + ui *embedding; + V_ID **valid_candidate; + bool *visited_vertices; + + idx = new ui[max_depth]; + idx_count = new ui[max_depth]; + embedding = new ui[max_depth]; + visited_vertices = new bool[data_graph->getVerticesCount()]; + std::fill(visited_vertices, visited_vertices + data_graph->getVerticesCount(), false); + valid_candidate = new ui *[max_depth]; + + for (int i = 0; i < max_depth; ++i) { + V_ID cur_vertex = order[i]; + ui max_candidate_count = candidates_count[cur_vertex]; + valid_candidate[i] = new V_ID[max_candidate_count]; + } + + idx[cur_depth] = 0; + idx_count[cur_depth] = candidates_count[start_vertex]; + std::copy(candidates[start_vertex], + candidates[start_vertex] + candidates_count[start_vertex], + valid_candidate[cur_depth]); + + while (true) { + while (idx[cur_depth] < idx_count[cur_depth]) { + V_ID u = order[cur_depth]; + V_ID v = valid_candidate[cur_depth][idx[cur_depth]]; + embedding[u] = v; + visited_vertices[v] = true; + idx[cur_depth] += 1; + + if (cur_depth == max_depth - 1) { + embedding_cnt += 1; + visited_vertices[v] = false; + if (embedding_cnt >= output_limit_num) { + goto EXIT; + } + } else { + call_count += 1; + cur_depth += 1; + idx[cur_depth] = 0; + generateValidCandidates(data_graph, + cur_depth, + embedding, + idx_count, + valid_candidate, + visited_vertices, + bn, + bn_count, + order, + candidates, + candidates_count); + } + } + + cur_depth -= 1; + if (cur_depth < 0) + break; + else + visited_vertices[embedding[order[cur_depth]]] = false; + } + +// Release the buffer. +EXIT: + delete[] bn_count; + delete[] idx; + delete[] idx_count; + delete[] embedding; + delete[] visited_vertices; + for (int i = 0; i < max_depth; ++i) { + delete[] bn[i]; + delete[] valid_candidate[i]; + } + delete[] bn; + delete[] valid_candidate; + return embedding_cnt; +} + +void bfs(Graph *graph, + V_ID root_v, + TreeNode *&tree, + V_ID *&order, + __attribute__((unused)) V_ID *&provenance) { + // How many vertices. + ui count = graph->getVerticesCount(); + std::queue queue; + std::vector visited(count, false); + // Initialize the tree + tree = new TreeNode[count]; + for (ui i = 0; i < count; ++i) { + tree[i].initialize(count); + } + // This is + order = new V_ID[count]; + ui visited_count = 0; // how many nodes have been visited. + queue.push(root_v); // push the first root node into the BFS queue. + visited[root_v] = true; + tree[root_v].level = 0; + tree[root_v].id = root_v; // node id. + while (!queue.empty()) { + V_ID u = queue.front(); + queue.pop(); + order[visited_count++] = u; // records this as the next order. + ui u_nbrs_count; + // How many neighbors of the node + // get the neighbors informations + V_ID *u_nbrs = graph->getVertexNeighbors(u, u_nbrs_count); + for (ui i = 0; i < u_nbrs_count; ++i) { + V_ID u_nbr = u_nbrs[i]; + // for each unvisited neighbors, if unvisited: + // (1) push in th query + // (2) build it in the tree + // (3) record it's parent-id + // (4) the level should add 1 + // (5) the parent node will add this node as child. + if (!visited[u_nbr]) { + queue.push(u_nbr); + visited[u_nbr] = true; + tree[u_nbr].id = u_nbr; + tree[u_nbr].parent = u; + tree[u_nbr].level = tree[u].level + 1; + tree[u].children[tree[u].children_count++] = u_nbr; + } + } + } +} + +void Insertion(V_ID u, + V_ID &tmp_first, + V_ID &tmp_second, + std::unordered_map>> &intersetion) { + /*if (tmp_first > tmp_second) { + V_ID tmp = tmp_first; + tmp_first = tmp_second; + tmp_second = tmp; + }*/ + std::pair p1(tmp_first, tmp_second); + // change to the vector next time. + if (intersetion.find(u) == intersetion.end()) { + std::vector> q; + q.push_back(p1); + intersetion.insert(std::pair>>(u, q)); + } else { + intersetion[u].push_back(p1); + } +} + +V_ID ParentNode(V_ID first, V_ID second, TreeNode *&tree) { + std::vector frontier; + frontier.push_back(first); + // std::cout <<"(" << first << ","<< second << ")"<< std::endl; + while (tree[first].parent != 99999) { + first = tree[first].parent; + frontier.push_back(first); + } + // std::cout <<"2: and size: " << frontier.size() << " " << second << std::endl; + // std::cout <second-> " << std::endl; + return second; + } + while (tree[second].parent != 99999) { + second = tree[second].parent; + // std::cout <<"parent-> " << second << std::endl; + if (std::find(frontier.begin(), frontier.end(), second) != frontier.end()) { + // std::cout <<"Found->second->parent-> " << std::endl; + return second; + } + } + // std::cout <<"End of ParentNode" << std::endl; + return 99999; +} + +void add_provenance(std::unordered_map> &query_provenance, + V_ID &query_node, + V_ID &provenance_node) { + if (query_provenance.find(query_node) != query_provenance.end()) { + // insert new value and key in the query_provenance. + std::vector::iterator iter; + iter = find( + query_provenance[query_node].begin(), query_provenance[query_node].end(), provenance_node); + if (iter == query_provenance[query_node].end()) { + query_provenance[query_node].push_back(provenance_node); + } + } else { + std::unordered_map>::iterator iter; + iter = query_provenance.begin(); + std::vector node_pro; + node_pro.push_back(provenance_node); + query_provenance.insert(iter, std::pair>(query_node, node_pro)); + } +} + +int64_t enumeration_ress_bk( + bool *visited, + int64_t &res_all, + std::vector>> &P_Candidates, + std::vector>> &NTE_Candidates, + V_ID *order, + TreeNode *tree, + V_ID *res, + ui current_order, + ui &query_count, + Graph *query_graph, + Graph *data_graph, + std::vector &order_index, + V_ID *connection, + V_ID *offset, + ui **candidates_2, + ui **parent_offset, + ui *candidates_l, + ui *parent_l) { + V_ID u = order[current_order]; + V_ID v_f = res[order_index[tree[u].parent]]; + std::vector local_c = P_Candidates[u][v_f]; + int64_t total = 0; + bool over_all; + if (current_order == query_count) { + for (unsigned int j : local_c) { + if (visited[j]) { + continue; + } + over_all = true; + res_all += 1; + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(j, data_nbrs_count_1); + // std::unordered_map>* v_nl = data_graph->getVertexNL(local_c[j]); + for (ui k = offset[current_order - 1]; k < offset[current_order]; k++) { + // L_ID l = query_graph->getVertexLabel(order[connection[k]]); + if (std::find(data_nbrs_1, data_nbrs_1 + data_nbrs_count_1, res[connection[k]]) == + data_nbrs_1 + data_nbrs_count_1) { + over_all = false; + break; + } + } + if (over_all == true) { + total += 1; + } + } + + return total; + } else { + for (unsigned int j : local_c) { + // V_ID v = local_c[j]; + if (visited[j]) { + continue; + } + over_all = true; + res_all += 1; + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(j, data_nbrs_count_1); + + for (ui k = offset[current_order - 1]; k < offset[current_order]; k++) { + if (std::find(data_nbrs_1, data_nbrs_1 + data_nbrs_count_1, res[connection[k]]) == + data_nbrs_1 + data_nbrs_count_1) { + over_all = false; + break; + } + } + if (over_all == true) { + visited[j] = true; + res[current_order] = j; + total += enumeration_ress_bk(visited, + res_all, + P_Candidates, + NTE_Candidates, + order, + tree, + res, + current_order + 1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + candidates_2, + parent_offset, + candidates_l, + parent_l); + visited[j] = false; + } + } + return total; + } +} + +void visit_pre_3(__attribute__((unused)) bool *visited, + ui **candidates_2, + int64_t **parent_offset, + __attribute__((unused)) int64_t *candidates_l, + V_ID ¤t_order, + __attribute__((unused)) V_ID &p_order, + int64_t &k, + V_ID *res, + __attribute__((unused)) bool &run) { + int64_t j = k; + res[current_order] = candidates_2[current_order][j]; + + visited[candidates_2[current_order][j]] = true; + + for (int i = current_order; i > 0; i--) { + j = parent_offset[i][j]; + visited[candidates_2[i - 1][j]] = true; + res[i - 1] = candidates_2[i - 1][j]; + } +} + +void visit_pre(__attribute__((unused)) bool *visited, + ui **candidates_2, + int64_t **parent_offset, + __attribute__((unused)) int64_t *candidates_l, + V_ID ¤t_order, + __attribute__((unused)) V_ID &p_order, + int64_t &k, + V_ID *res) { + int64_t j = k; + visited[candidates_2[current_order - 1][j]] = true; + res[current_order - 1] = candidates_2[current_order - 1][j]; + + for (int i = current_order - 1; i > 0; i--) { + j = parent_offset[i][j]; + visited[candidates_2[i - 1][j]] = true; + res[i - 1] = candidates_2[i - 1][j]; + } +} + +void visit_pre_2(__attribute__((unused)) bool *visited, + ui **candidates_2, + int64_t **parent_offset, + __attribute__((unused)) int64_t *candidates_l, + V_ID ¤t_order, + __attribute__((unused)) V_ID &p_order, + int64_t &k, + V_ID *res) { + int64_t j = k; + res[current_order - 1] = candidates_2[current_order - 1][j]; + + for (int i = current_order - 1; i > 0; i--) { + j = parent_offset[i][j]; + res[i - 1] = candidates_2[i - 1][j]; + } +} + +void visit_can(bool *visited, + ui **candidates_2, + int64_t **parent_offset, + __attribute__((unused)) int64_t *candidates_l, + V_ID ¤t_order, + __attribute__((unused)) V_ID &p_order, + int64_t &k) { + int64_t j = k; + visited[candidates_2[current_order - 1][j]] = false; + + for (int i = current_order - 1; i > 0; i--) { + j = parent_offset[i][j]; + visited[candidates_2[i - 1][j]] = false; + } +} + +ui &find_v_f(__attribute__((unused)) ui **candidates_2, + __attribute__((unused)) int64_t **parent_offset, + __attribute__((unused)) int64_t *candidates_l, + V_ID ¤t_order, + __attribute__((unused)) V_ID &p_order, + int64_t &k) { + if (current_order - 1 == p_order) { + return candidates_2[p_order][k]; + } else { + int64_t j = k; + for (ui i = current_order - 1; i > p_order; i--) { + j = parent_offset[i][j]; + } + return candidates_2[p_order][j]; + } +} +void find_children_range(int64_t **children_1, + int64_t *candidates_l, + V_ID &ord_count, + V_ID ¤t_count, + int64_t &start, + int64_t &end, + int64_t &p) { + // start and end. + // std::cout <<"###sss" << std::endl; + if (current_count + 1 == ord_count) { + start = p + 0; + end = p + 1; + } else { + start = children_1[current_count][p]; + if (p >= candidates_l[current_count]) { + end = start; + return; + } + end = children_1[current_count][p + 1]; + for (ui i = current_count + 1; i < ord_count - 1; i++) { + start = children_1[i][start]; + if (end < candidates_l[i]) { + end = children_1[i][end]; + } else { + end = children_1[i][candidates_l[i]]; + } + } + } +} +int64_t morphism_next(bool *visited_j, + bool *&visited, + int64_t **children_offset_2, + int64_t *candidates_l_2, + ui q_add, + int64_t &j, + ui order_count_2, + std::vector> &i_e, + ui t, + int64_t *start_2, + int64_t *end_2, + ui **res_2) { + t += 1; + int64_t total = 0; + ui order_s = q_add - 1; + if (t < i_e.size()) { + ui q = i_e[t].second; + q_add = q + 1; + find_children_range(children_offset_2, candidates_l_2, q_add, order_s, start_2[t], end_2[t], j); + for (int64_t jj = start_2[t]; jj < end_2[t]; jj++) { + if (visited[res_2[q][jj]] == false) { + if (q_add == order_count_2) { + // std::cout <<"333" << std::endl; + total += 1; + } else { + total += morphism_next(visited_j, + visited, + children_offset_2, + candidates_l_2, + q_add, + jj, + order_count_2, + i_e, + t, + start_2, + end_2, + res_2); + // if (total != 0) { + // std::cout <> &i_e, + std::vector q_all, + ui t, + int64_t *start_2, + int64_t *end_2, + ui **res_2, + ui q_1, + int64_t &call_count_2) { + t += 1; + bool all_exist = false; + int64_t total_2 = 0; + ui order_s = q_add - 1; + if (t < q_all.size()) { + ui q = q_all[t]; + q_add = q + 1; + int64_t tmp = data_count * q; + // ui* & visit_local_5 = visit_local_4[tmp]; + // std::cout < 0) { + for (int64_t jj = start_2[t]; jj < end_2[t]; jj++) { + V_ID *res_t_2 = res_2_r[jj]; + call_count_2++; + all_exist = true; + for (auto &t_2 : i_e) { + ui q_3 = t_2.second; + if (visited[res_t_2[q_3]]) { + all_exist = false; + break; + } + } + total_2 += all_exist; + } + } else { + call_count_2 += 1; + total_2 = end_2[t] - start_2[t]; + } + } + return total_2; + } +} + +void reverse_cuts(__attribute__((unused)) ui **can_1, + __attribute__((unused)) int64_t **parent_1, + __attribute__((unused)) int64_t **children_1, + __attribute__((unused)) int64_t *candidates_l, + __attribute__((unused)) V_ID &ord_count, + __attribute__((unused)) ui **can_1_n, + __attribute__((unused)) int64_t **parent_1_n, + __attribute__((unused)) int64_t *candidates_l_n, + __attribute__((unused)) bool **valid) { + __attribute__((unused)) int64_t p, pp; + int64_t j, j_old, k, l; + + l = candidates_l[ord_count - 1]; + + for (int64_t d = 0; d < l; d++) { + can_1_n[ord_count - 1][d] = can_1[ord_count - 1][d]; + } + + candidates_l[ord_count - 1] = l; + + std::cout << "Finish Reverse" << std::endl; + for (ui i = ord_count - 1; i > 0; i--) { + std::cout << i << std::endl; + l = candidates_l[i]; + p = parent_1[i][0]; + for (int64_t q = 0; q <= p; q++) { + children_1[i - 1][q] = 0; + } + k = p + 1; + j_old = 0; + // std::cout <<"l->" << l << std::endl; + for (j = 1; j < l; j++) { + if (parent_1[i][j] != p) { + j_old = j; + p = parent_1[i][j]; + for (int64_t q = k; q <= p; q++) { + children_1[i - 1][q] = j_old; + } + k = p + 1; + } + } + p = parent_1[i][l - 1]; + // k = p + 1; + for (int64_t q = k; q <= candidates_l[i - 1] + 1; q++) { + children_1[i - 1][q] = l; + } + } +} + +int64_t find_key(__attribute__((unused)) ui **can_1, + int64_t **parent_1, + V_ID ord_1, + V_ID &ord_count_1, + int64_t p) { + for (ui i = ord_count_1 - 1; i > ord_1; i--) { + p = parent_1[i][p]; + } + return p; +} + +void find_res(ui **can_1, int64_t **parent_1, V_ID &ord_count_1, int64_t p, V_ID *res) { + for (ui i = ord_count_1 - 1; i > 0; i--) { + res[i] = can_1[i][p]; + p = parent_1[i][p]; + } + res[0] = can_1[0][p]; +} + +V_ID &find_value(ui **can_1, int64_t **parent_1, V_ID &ord_1, V_ID &ord_count_1, int64_t p) { + for (ui i = ord_count_1 - 1; i > ord_1; i--) { + p = parent_1[i][p]; + } + return can_1[ord_1][p]; +} + +bool check_morphism(ui **can_1, + ui **can_2, + int64_t **parent_1, + int64_t **parent_2, + V_ID &ord_1, + V_ID &ord_2, + V_ID &ord_count_1, + V_ID &ord_count_2, + int64_t p, + int64_t q) { + if (find_value(can_1, parent_1, ord_1, ord_count_1, p) == + find_value(can_2, parent_2, ord_2, ord_count_2, q)) { + return true; + } + return false; +} + +void enumeration_bfs2(bool *visited, + int64_t &res_all_1, + ui **candidates, + ui *candidates_count, + V_ID *order, + TreeNode *tree, + V_ID *res, + ui current_order, + ui &query_count, + Graph *query_graph, + Graph *data_graph, + std::vector &order_index, + V_ID *connection, + V_ID *offset, + ui **candidates_2, + int64_t **parent_offset, + int64_t *candidates_l, + V_ID *visit_local_2, + ui &c_length, + bool *morphism) { + // Current Order = current_order + // Get the query node of current order + V_ID &u = order[current_order]; + // ui p_node = tree[u].parent; + if (current_order == 1) { + candidates_l[0] = static_cast(candidates_count[order[0]]); + for (int64_t i = 0; i < candidates_l[0]; i++) { + candidates_2[0][i] = candidates[order[0]][i]; + } + } + ui p_order = 0; + int64_t total = 0; + + for (ui ii = 0; ii < candidates_count[u]; ii++) { + visit_local_2[candidates[u][ii]] = 0; + } + + if (true) { + int64_t c_l = candidates_l[current_order - 1]; + ui i, j; + ui *&candidate_3 = candidates_2[current_order]; + int64_t *&parent_3 = parent_offset[current_order]; + L_ID l = query_graph->getVertexLabel(u); + V_ID d = offset[current_order] - offset[current_order - 1]; + + ui candidates_tmp_count = candidates_count[u]; + ui *candidates_tmp = new ui[1000000]; + + if (current_order == query_count - 1) { + if (morphism[current_order] == true) { + for (int64_t k = 0; k < c_l; k++) { + visit_pre( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + // for (i = 0; i < candidates_count[u]; i++) { + // visit_local_2[candidates[u][i]] = 0; + // } + + candidates_tmp_count = 0; + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + if (visit_local_2[tmp[j]] == 1) { + candidates_tmp[candidates_tmp_count++] = tmp[j]; + } + } + } + for (j = 0; j < candidates_tmp_count; j++) { + res_all_1 += 1; + if (visit_local_2[candidates_tmp[j]] == d && visited[candidates_tmp[j]] == false) { + candidate_3[total] = candidates_tmp[j]; + parent_3[total] = k; + total += 1; + } + } + + for (i = 0; i < candidates_tmp_count; i++) { + visit_local_2[candidates_tmp[i]] = 0; + } + + visit_can(visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k); + } + } else { + for (int64_t k = 0; k < c_l; k++) { + visit_pre_2( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + candidates_tmp_count = 0; + // for (i = 0; i < candidates_count[u]; i++) { + // visit_local_2[candidates[u][i]] = 0; + // } + + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + if (visit_local_2[tmp[j]] == 1) { + candidates_tmp[candidates_tmp_count++] = tmp[j]; + } + } + } + for (j = 0; j < candidates_tmp_count; j++) { + res_all_1 += 1; + if (visit_local_2[candidates_tmp[j]] == d) { + candidate_3[total] = candidates_tmp[j]; + parent_3[total] = k; + total += 1; + } + } + + for (i = 0; i < candidates_tmp_count; i++) { + visit_local_2[candidates_tmp[i]] = 0; + } + } + } + } else { + if (morphism[current_order] == true) { + for (int64_t k = 0; k < c_l; k++) { + visit_pre( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + + // candidates_tmp_count = 0; + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + for (j = 0; j < candidates_count[u]; j++) { + res_all_1 += 1; + if (visit_local_2[candidates[u][j]] == d && visited[candidates[u][j]] == false) { + candidate_3[total] = candidates[u][j]; + parent_3[total] = k; + total += 1; + } + } + + for (i = 0; i < candidates_count[u]; i++) { + visit_local_2[candidates[u][i]] = 0; + } + + visit_can(visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k); + } + } else { + for (int64_t k = 0; k < c_l; k++) { + visit_pre_2( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + // ui t = (current_order-1)*c_length; + + // candidates_tmp_count = 0; + + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + for (j = 0; j < candidates_count[u]; j++) { + res_all_1 += 1; + if (visit_local_2[candidates[u][j]] == d) { + candidate_3[total] = candidates[u][j]; + parent_3[total] = k; + total += 1; + } + } + + for (i = 0; i < candidates_count[u]; i++) { + visit_local_2[candidates[u][i]] = 0; + } + } + } + } + // std::cout <<"total:" << total << std::endl; + } + candidates_l[current_order] = total; + if (current_order != query_count - 1) { + enumeration_bfs2(visited, + res_all_1, + candidates, + candidates_count, + order, + tree, + res, + current_order + 1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + candidates_2, + parent_offset, + candidates_l, + visit_local_2, + c_length, + morphism); + } + std::cout << "End " << total << std::endl; +} + +void enumeration_bfs(bool *visited, + int64_t &res_all, + std::vector>> &P_Candidates, + V_ID *order, + TreeNode *tree, + V_ID *res, + ui current_order, + ui &query_count, + Graph *query_graph, + Graph *data_graph, + std::vector &order_index, + V_ID *connection, + V_ID *offset, + ui **candidates_2, + int64_t **parent_offset, + int64_t *candidates_l, + V_ID *visit_local_2, + ui &c_length, + bool *morphism) { + // Current Order = current_order + // Get the query node of current order + V_ID &u = order[current_order]; + V_ID &p_order = order_index[tree[u].parent]; + // Next is to build the current order index. + int64_t c_l = candidates_l[current_order - 1]; + // std::cout <<"Current #order->" << current_order << "," << c_l << "," << " P_order->" << p_order + // << + // ", u:" << u << std::endl; + int64_t total = 0; + ui i, j; + ui *&candidate_3 = candidates_2[current_order]; + int64_t *&parent_3 = parent_offset[current_order]; + + L_ID l = query_graph->getVertexLabel(u); + V_ID d = offset[current_order] - offset[current_order - 1]; + + if (current_order == query_count - 1) { + if (morphism[current_order] == true) { + for (int64_t k = 0; k < c_l; k++) { + visit_pre( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + V_ID &v_f = res[p_order]; + + std::vector &local_c = P_Candidates[u][v_f]; + + for (i = 0; i < local_c.size(); i++) { + visit_local_2[local_c[i]] = 0; + } + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + for (j = 0; j < local_c.size(); j++) { + if (visit_local_2[local_c[j]] == d && visited[local_c[j]] == false) { + candidate_3[total] = local_c[j]; + parent_3[total] = k; + total += 1; + } + } + visit_can(visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k); + } + } else { + // std::cout <<"Here" << std::endl; + for (int64_t k = 0; k < c_l; k++) { + visit_pre_2( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + V_ID &v_f = res[p_order]; + std::vector &local_c = P_Candidates[u][v_f]; + + for (i = 0; i < local_c.size(); i++) { + visit_local_2[local_c[i]] = 0; + } + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + for (j = 0; j < local_c.size(); j++) { + // std::cout <<"Local->" << visit_local_2[local_c[j]] << ",d->" << d << std::endl; + if (visit_local_2[local_c[j]] == d) { + candidate_3[total] = local_c[j]; + parent_3[total] = k; + total += 1; + } + } + } + } + } else { + if (morphism[current_order] == true) { + for (int64_t k = 0; k < c_l; k++) { + visit_pre( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + V_ID &v_f = res[p_order]; + std::vector &local_c = P_Candidates[u][v_f]; + // ui t = (current_order-1)*c_length; + for (i = 0; i < local_c.size(); i++) { + visit_local_2[local_c[i]] = 0; + } + + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + // abc + for (j = 0; j < local_c.size(); j++) { + if (visited[local_c[j]] == false && visit_local_2[local_c[j]] == d) { + candidate_3[total] = local_c[j]; + parent_3[total] = k; + total += 1; + } + } + + visit_can(visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k); + } + } else { + for (int64_t k = 0; k < c_l; k++) { + visit_pre_2( + visited, candidates_2, parent_offset, candidates_l, current_order, p_order, k, res); + V_ID &v_f = res[p_order]; + std::vector &local_c = P_Candidates[u][v_f]; + + // ui t = (current_order-1)*c_length; + + for (i = 0; i < local_c.size(); i++) { + visit_local_2[local_c[i]] = 0; + } + + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = + data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + for (j = 0; j < local_c.size(); j++) { + if (visit_local_2[local_c[j]] == d) { + candidate_3[total] = local_c[j]; + parent_3[total] = k; + total += 1; + } + } + } + } + } + + candidates_l[current_order] = total; + if (current_order != query_count - 1) { + enumeration_bfs(visited, + res_all, + P_Candidates, + order, + tree, + res, + current_order + 1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + candidates_2, + parent_offset, + candidates_l, + visit_local_2, + c_length, + morphism); + } + + // std::cout <<"End " << total << std::endl; +} + +int64_t enumeration_ress_bk_2( + bool *visited, + int64_t &res_all, + std::vector>> &P_Candidates, + V_ID *order, + TreeNode *tree, + V_ID *res, + ui current_order, + ui &query_count, + Graph *query_graph, + Graph *data_graph, + std::vector &order_index, + V_ID *connection, + V_ID *offset, + V_ID *visit_local, + V_ID *visit_local_2, + ui &c_length, + bool *morphism) { + V_ID &u = order[current_order]; + V_ID &v_f = res[order_index[tree[u].parent]]; + std::vector &local_c = P_Candidates[u][v_f]; + V_ID d = offset[current_order] - offset[current_order - 1]; + int64_t total = 0; + ui i, j; + + if (current_order == query_count) { + for (i = 0; i < local_c.size(); i++) { + visit_local_2[local_c[i]] = 0; + } + L_ID l = query_graph->getVertexLabel(u); + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_2[tmp[j]] += 1; + } + } + + if (morphism[current_order] == true) { + for (j = 0; j < local_c.size(); j++) { + if (visit_local_2[local_c[j]] == d) { + total += 1 - visited[local_c[j]]; + } + } + } else { + for (j = 0; j < local_c.size(); j++) { + if (visit_local_2[local_c[j]] == d) { + total += 1; + } + } + } + return total; + } else { + ui t = (current_order - 1) * c_length; + V_ID *visit_local_3 = visit_local + t; + for (i = 0; i < local_c.size(); i++) { + visit_local_3[local_c[i]] = 0; + } + L_ID l = query_graph->getVertexLabel(u); + for (i = offset[current_order - 1]; i < offset[current_order]; i++) { + std::unordered_map> *v_nl = data_graph->getVertexNL(res[connection[i]]); + std::vector &tmp = (*v_nl)[l]; + for (j = 0; j < tmp.size(); j++) { + visit_local_3[tmp[j]] += 1; + } + } + if (morphism[current_order] == true) { + for (j = 0; j < local_c.size(); j++) { + if (visited[local_c[j]] == false && visit_local_3[local_c[j]] == d) { + res[current_order] = local_c[j]; + visited[local_c[j]] = true; + total += enumeration_ress_bk_2(visited, + res_all, + P_Candidates, + order, + tree, + res, + current_order + 1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + visit_local, + visit_local_2, + c_length, + morphism); + visited[local_c[j]] = false; + } + } + } else { + for (j = 0; j < local_c.size(); j++) { + if (visit_local_3[local_c[j]] == d) { + res[current_order] = local_c[j]; + total += enumeration_ress_bk_2(visited, + res_all, + P_Candidates, + order, + tree, + res, + current_order + 1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + visit_local, + visit_local_2, + c_length, + morphism); + } + } + } + return total; + } +} + +int64_t enumeration_res(int &res_all, + std::vector>> &P_Candidates, + V_ID v_f, + V_ID *&order, + TreeNode *&tree, + V_ID *res, + ui current_order, + int64_t total, + int size_all, + int query_count, + Graph *&query_graph, + Graph *&data_graph, + std::vector &order_index) { + V_ID u = order[current_order]; + // std::cout <<"####In enumeration function" << std::endl; + bool over_all = true; + int64_t res_p = 0; + v_f = res[total * size_all + order_index[tree[u].parent]]; + int p = P_Candidates[u][v_f].size(); + // std::cout <<"p:->" << p << ", u:"<< u << ", order:"<< current_order << ",v_f" << v_f << ", + // Order + // index of parent" << order_index[tree[u].parent] << ", tree.parent->" << tree[u].parent << + // std::endl; + + if (static_cast(current_order) == query_count - 1) { + // std::cout <<"** in if" << std::endl; + for (int j = 0; j < p; j++) { + over_all = true; + ui nbrs_count_1; + V_ID *nbrs_1 = query_graph->getVertexNeighbors(order[current_order], nbrs_count_1); + for (ui k = 0; k < current_order; k++) { + if (res[total * size_all + k] == P_Candidates[u][v_f][j]) { + over_all = false; + break; + } + } + res_all += 1; + + if (over_all == true) { + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = + data_graph->getVertexNeighbors(P_Candidates[u][v_f][j], data_nbrs_count_1); + for (ui k = 0; k < nbrs_count_1; k++) { + if (order_index[nbrs_1[k]] < current_order && + (nbrs_1[k] == order[0] || order_index[nbrs_1[k]] != 0)) { + over_all = false; + V_ID tmp = res[total * size_all + order_index[nbrs_1[k]]]; + for (int64_t t = 0; t < data_nbrs_count_1; t++) { + if (data_nbrs_1[t] == tmp) { + over_all = true; + // std::cout <<"Last True here:::" << std::endl; + // break; + } + } + if (over_all == false) { + break; + } + } + } + } + + if (over_all == true) { + // std::cout <<"Res_p:" << res_p << "," << P_Candidates[u][v_f][j] << std::endl; + res[total * size_all + (res_p)*size_all + current_order] = P_Candidates[u][v_f][j]; + for (ui t = 0; t < current_order; t++) { + res[total * size_all + res_p * size_all + t] = res[total * size_all + t]; + } + res_p += 1; + } + } + + return res_p; + } else { + int64_t total_2 = 0; + for (uint64_t j = 0; j < P_Candidates[u][v_f].size(); j++) { + over_all = true; + int64_t single_res = 0; + // std::cout <" << P_Candidates[u][v_f][j] << std::endl; + ui nbrs_count_1; + V_ID *nbrs_1 = query_graph->getVertexNeighbors(order[current_order], nbrs_count_1); + // std::cout <<"Query count:" << nbrs_count_1 << std::endl; + for (ui k = 0; k < current_order; k++) { + if (res[total * size_all + k] == P_Candidates[u][v_f][j]) { + over_all = false; + // std::cout <<"current_node" << P_Candidates[u][v_f][j] << ", expected node:" << + // res[total*size_all + k] << ", order ->" << k << std::endl;std::cout <<"<<---->> Erase + // <<---->>" + // << std::endl; + break; + } + } + if (over_all == true) { + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = + data_graph->getVertexNeighbors(P_Candidates[u][v_f][j], data_nbrs_count_1); + for (ui k = 0; k < nbrs_count_1; k++) { + if (order_index[nbrs_1[k]] < current_order && + (nbrs_1[k] == order[0] || order_index[nbrs_1[k]] != 0)) { + over_all = false; + V_ID tmp_v = res[total * size_all + order_index[nbrs_1[k]]]; + + // std::cout <<"NB ID:" << nbrs_1[k] << ",Data Count:" << data_nbrs_count_1 << ", V: " + // << + // tmp_v << "," << nbrs_1[k] << "," << order_index[nbrs_1[k]] << std::endl; + for (int64_t t = 0; t < data_nbrs_count_1; t++) { + if (data_nbrs_1[t] == tmp_v) { + over_all = true; + // std::cout <<"True here" << std::endl; + // break; + } + } + + if (over_all == false) { + break; + } + } + } + } + res_all += 1; + if (over_all == true) { + // std::cout <" << P_Candidates[u][v_f][j] << ", + // &&¤t + // order***: " << current_order << std::endl;std::cout <<"Over ALL Game Over" << std::endl; + // res_p += 1; + res[total * size_all + total_2 * size_all + current_order] = P_Candidates[u][v_f][j]; + for (ui t = 0; t < current_order; t++) { + res[total * size_all + total_2 * size_all + t] = res[total * size_all + t]; + } + // std::cout <<")))In function --->" << ", Order and p->vertex->" << P_Candidates[u][v_f][j] + // << + // endl; + single_res = enumeration_res(res_all, + P_Candidates, + P_Candidates[u][v_f][j], + order, + tree, + res, + current_order + 1, + total + total_2, + size_all, + query_count, + query_graph, + data_graph, + order_index); + // std::cout <<"--->Finish single_res" << single_res << std::endl; + // std::cout <<"next Single res:" << single_res << std::endl; + // This is the one + for (int64_t k = 0; k < single_res; k++) { + // std::cout <<"all" << std::endl; + res[total * size_all + total_2 * size_all + k * size_all + current_order] = + P_Candidates[u][v_f][j]; + // break; + // std::cout <<"Current order and data vertex: " << current_order << ", " << + // P_Candidates[u][v_f][j] << std::endl; + } + + total_2 += single_res; + } + } + // std::cout <<"Total_2->" << total_2 << std::endl; + return total_2; + } +} + +// Assume we have all the information in the graph. +void local_optimization( + std::unordered_map> &res, + ui query_count, + __attribute__((unused)) std::vector order_index, + __attribute__((unused)) + std::unordered_map>> &intersetion, + ui *&order, + __attribute__((unused)) TreeNode *&tree, + __attribute__((unused)) std::vector>> &P_Candidates, + __attribute__((unused)) + std::unordered_map>>> + data_provenance, + __attribute__((unused)) Graph *data_graph, + Graph *query_graph) { + for (ui i = 0; i < query_count; i++) { + ui u = order[i]; + std::vector local_res; + ui label = query_graph->getVertexLabel(u); + local_res.push_back(label); + for (ui j = i + 1; j < query_count; j++) { + ui tmp = order[j]; + if (res.find(label) == res.end() && query_graph->getVertexLabel(tmp) == label) { + local_res.push_back(tmp); + } + } + if (local_res.size() > 1) { + auto tmp = res.emplace(u, std::vector()); + tmp.first->second = local_res; + } + } +} + +void Refinement( + std::unordered_map>> &intersetion, + ui *&order, + TreeNode *&tree, + ui u, + std::vector>> &P_Candidates, + std::unordered_map>>> + data_provenance, + Graph *data_graph) { + // TreeNode& u_node = tree[u]; + ui u_f = INVALID_VERTEX_ID; + std::unordered_map> frontiers_2; + if (u != order[0]) { + u_f = tree[u].parent; + frontiers_2 = P_Candidates[u_f]; + } + std::unordered_map> frontiers_1 = P_Candidates[u]; + __attribute__((unused)) bool do_interset; + do_interset = false; + bool update_index = false; + if (u != order[0] && intersetion.find(u) != intersetion.end()) { + do_interset = true; + for (ui j = 0; j < tree[u].bn_count; j++) { + update_index = false; + + if (tree[u].bn[j] != tree[u].parent) { + V_ID u_nb = tree[u].bn[j]; + std::unordered_map> frontiers_3 = P_Candidates[u_nb]; + V_ID u_nb_f = tree[u_nb].parent; + V_ID pro_node = ParentNode(u, u_nb, tree); + + for (const auto &it_3 : frontiers_3) { + ui v_3_f = it_3.first; + bool find_n = false; + std::vector c_2 = it_3.second; + std::vector find_intersect; + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + find_intersect.push_back(false); + } + + for (const auto &it_1 : frontiers_1) { + ui v_1_f = it_1.first; + bool share_pro = false; + + for (ui k_1 = 0; k_1 < data_provenance[u_f][v_1_f][pro_node].size(); k_1++) { + for (ui k_2 = 0; k_2 < data_provenance[u_nb_f][v_3_f][pro_node].size(); k_2++) { + if (data_provenance[u_f][v_1_f][pro_node][k_1] == + data_provenance[u_nb_f][v_3_f][pro_node][k_2]) { + share_pro = true; + break; + } + } + } + + if (share_pro == true) { + // int count = 0; + std::vector c_1 = it_1.second; + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + ui v_2 = c_2[l_2]; + if (find_intersect[l_2] == true) continue; + for (unsigned int v_1 : c_1) { + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(v_1, data_nbrs_count_1); + if (DEBUG) { + // std::cout <<"Neighbors are: " ; + } + for (ui p = 0; p < data_nbrs_count_1; p++) { + if (data_nbrs_1[p] == v_2) { + find_intersect[l_2] = true; + find_n = true; + break; + } + } + if (find_intersect[l_2] == true) { + break; + } + } + } + } + } + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + ui v_2 = c_2[l_2]; + if (find_intersect[l_2] == false) { + for (uint64_t ll = 0; ll < P_Candidates[u_nb][v_3_f].size(); ll++) { + if (P_Candidates[u_nb][v_3_f][ll] == v_2) { + if (DEBUG) { + std::cout << "Erase intersect, v_3_f: " << v_3_f << std::endl; + } + P_Candidates[u_nb][v_3_f].erase(P_Candidates[u_nb][v_3_f].begin() + ll); + update_index = true; + break; + } + } + } + } + if (DEBUG) { + std::cout << "Judge" << std::endl; + } + if (find_n == false) { + if (DEBUG) { + std::cout << "Erase Candidates" << std::endl; + } + P_Candidates[u_nb].erase(v_3_f); + update_index = true; + if (DEBUG) { + std::cout << "Erase C-Done"; + } + } + } + if (update_index == true) { + Refinement(intersetion, order, tree, u_nb, P_Candidates, data_provenance, data_graph); + } + } + } + } + if (DEBUG) { + std::cout << "Refine the parents" << std::endl; + } + // BFS reduce it's parent: + + update_index = false; + if (order[0] != u) { + frontiers_2 = P_Candidates[u_f]; + for (const auto &it : frontiers_2) { + V_ID v_f_f = it.first; + int count = 0; + if (P_Candidates[u_f].find(v_f_f) != P_Candidates[u_f].end()) { + for (uint64_t j = 0; j < it.second.size(); j++) { + V_ID v_f = it.second[j]; + if (frontiers_1.find(v_f) == frontiers_1.end()) { + P_Candidates[u_f][v_f_f].erase(P_Candidates[u_f][v_f_f].begin() + j - count); + count += 1; + } + // This is all + } + if (P_Candidates[u_f][v_f_f].size() == 0) { + P_Candidates[u_f].erase(v_f_f); + update_index = true; + } + } + } + } + + if (DEBUG) { + std::cout << "Refine the parents End" << std::endl; + } + if (update_index == true) { + Refinement(intersetion, order, tree, u_f, P_Candidates, data_provenance, data_graph); + } + + // std::cout <<"3" << std::endl; + + update_index = false; + frontiers_1 = P_Candidates[u]; + do_interset = false; + // Intersection erase their frontier neighbourhoods: + if (true) { + if (DEBUG) { + std::cout << " In the key of Intersetion." << std::endl; + } + do_interset = true; + if (u == order[0]) { + // return; + u_f = u; + } + for (ui j = 0; j < tree[u].fn_count; j++) { + update_index = false; + V_ID u_nb = tree[u].fn[j]; + + if (u_nb == u_f || + std::find(tree[u].children, tree[u].children + tree[u].children_count, u_nb) != + tree[u].children + tree[u].children_count) + continue; + if (DEBUG) { + std::cout << "Nbors N:" << u_nb << std::endl; + } + std::unordered_map> frontiers_3 = P_Candidates[u_nb]; + V_ID u_nb_f = tree[u_nb].parent; + V_ID pro_node = ParentNode(u, u_nb, tree); + + for (const auto &it_3 : frontiers_3) { + ui v_3_f = it_3.first; + bool find_n = false; + std::vector c_2 = it_3.second; + std::vector find_intersect; + // Temp array and vec. + // Initialize the find intersect bool list. + + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + find_intersect.push_back(false); + } + + for (const auto &it_1 : frontiers_1) { + ui v_1_f = it_1.first; + bool share_pro = false; + for (ui k_1 = 0; k_1 < data_provenance[u_f][v_1_f][pro_node].size(); k_1++) { + for (ui k_2 = 0; k_2 < data_provenance[u_nb_f][v_3_f][pro_node].size(); k_2++) { + if (data_provenance[u_f][v_1_f][pro_node][k_1] == + data_provenance[u_nb_f][v_3_f][pro_node][k_2]) { + share_pro = true; + break; + } + } + } + + if (share_pro == true) { + // int count = 0; + std::vector c_1 = it_1.second; + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + ui v_2 = c_2[l_2]; + if (find_intersect[l_2] == true) continue; + for (unsigned int v_1 : c_1) { + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(v_1, data_nbrs_count_1); + + for (ui p = 0; p < data_nbrs_count_1; p++) { + if (data_nbrs_1[p] == v_2) { + if (DEBUG) { + std::cout << "True###" << std::endl; + } + find_intersect[l_2] = true; + find_n = true; + break; + } + } + if (find_intersect[l_2] == true) { + break; + } + } + } + } + } + + for (uint64_t l_2 = 0; l_2 < c_2.size(); l_2++) { + ui v_2 = c_2[l_2]; + if (find_intersect[l_2] == false) { + for (uint64_t ll = 0; ll < P_Candidates[u_nb][v_3_f].size(); ll++) { + if (P_Candidates[u_nb][v_3_f][ll] == v_2) { + if (DEBUG) { + std::cout << "Erase intersect, v_3_f: " << v_3_f << std::endl; + } + P_Candidates[u_nb][v_3_f].erase(P_Candidates[u_nb][v_3_f].begin() + ll); + update_index = true; + if (DEBUG) { + std::cout << "Erase inter-Done" << std::endl; + } + break; + } + } + if (DEBUG) { + std::cout << "Current Node: " << 2 << " ; "; + for (const auto &iter : P_Candidates[2]) { + std::cout << "Vector Size: " << iter.second.size() << " "; + std::cout << "Parent node: " << iter.first << " ["; + for (unsigned int d : iter.second) { + std::cout << d << ","; + } + std::cout << " ] " << std::endl; + } + } + } + } + + if (find_n == false) { + P_Candidates[u_nb].erase(v_3_f); + if (DEBUG) { + std::cout << "Erase C-Done"; + } + } + } + if (update_index == true) { + Refinement(intersetion, order, tree, u_nb, P_Candidates, data_provenance, data_graph); + } + } + } + if (DEBUG) { + std::cout << "Refine the children::" << std::endl; + } + + // BFS reduce it's children: + + for (V_ID k = 0; k < tree[u].children_count; k++) { + update_index = false; + V_ID u_c = tree[u].children[k]; + frontiers_2 = P_Candidates[u_c]; + std::vector child; + int count = 0; + for (const auto &it : frontiers_2) { + V_ID v_f = it.first; + bool find_children = false; + for (const auto &it_1 : frontiers_1) { + if (std::find(it_1.second.begin(), it_1.second.end(), v_f) != it_1.second.end()) { + find_children = true; + break; + } + // This is all + } + if (find_children == false && P_Candidates[u_c].find(v_f) != P_Candidates[u_c].end()) { + P_Candidates[u_c].erase(v_f); + update_index = true; + } + count += 1; + } + + if (update_index == true) { + Refinement(intersetion, order, tree, u_c, P_Candidates, data_provenance, data_graph); + } + } + if (DEBUG) { + std::cout << std::endl << "end--> )))" << u << std::endl << std::endl; + } +} + +int local_cartesian(std::vector &order_index, + TreeNode *&tree, + std::vector &candidates_count, + ui u, + std::vector &node_tmp) { + int64_t total = 1; + int num_group = tree[u].children_count; + + if (num_group == 0) { + return candidates_count[order_index[u]]; + } + + for (int i = 0; i < num_group; i++) { + ui u_2 = tree[u].children[i]; + node_tmp.push_back(u_2); + total *= local_cartesian(order_index, tree, candidates_count, u_2, node_tmp); + // std::cout <<"Test->" << total << std::endl; + } + + return total; +} + +void cartesian_s(int *cartesian, + V_ID *&order, + std::vector order_index, + TreeNode *&tree, + std::vector &candidates_count, + ui root, + std::vector> &pattern_e, + ui &final_group, + V_ID *&order_1, + V_ID *&order_2, + ui &order_count_1, + ui &order_count_2, + ui &query_count) { + int num_group = tree[root].children_count; + std::cout << "potential partition num:" << num_group << std::endl; + + auto e_g = new int[100][100]; + + std::vector> node_l; + + int64_t total_cartesian = 1; + for (int i = 0; i < num_group; i++) { + std::vector node_tmp; + node_tmp.push_back(root); + cartesian[i] = 0; + ui u = tree[root].children[i]; + node_tmp.push_back(u); + cartesian[i] = local_cartesian(order_index, tree, candidates_count, u, node_tmp); + std::cout << " # cartesian result->" << u << "->" << cartesian[i] << std::endl; + node_l.push_back(node_tmp); + total_cartesian *= cartesian[i]; + } + + std::cout << "Finished cartesian" << std::endl; + // (1) Edge information + for (int i = 0; i < num_group; i++) { + for (int j = 0; j < num_group; j++) { + e_g[i][j] = 999; + if (i != j) { + e_g[i][j] = 0; + for (uint64_t p = 1; p < node_l[i].size(); p++) { + for (uint64_t q = 1; q < node_l[j].size(); q++) { + // std::cout <"<< i << ","<< j << + // endl; + if (order_index[node_l[i][p]] < order_index[node_l[j][q]]) { + for (uint64_t t = 0; t < pattern_e[order_index[node_l[i][p]]].size(); t++) { + if (node_l[j][q] == pattern_e[order_index[node_l[i][p]]][t]) { + e_g[i][j] += 1; + break; + } + } + } else if (order_index[node_l[i][p]] > order_index[node_l[j][q]]) { + for (uint64_t t = 0; t < pattern_e[order_index[node_l[j][q]]].size(); t++) { + if (node_l[i][p] == pattern_e[order_index[node_l[j][q]]][t]) { + e_g[i][j] += 1; + break; + } + } + } + } + } + } + // std::cout <<"group->" << i << ", and group->" << j << ", edge->" << e_g[i][j] << std::endl; + } + } + // (2) Devide the edge. + // Partition 1 edge. + int64_t min_e = 0; + for (int j = 1; j < num_group; j++) { + min_e += e_g[0][j]; + } + ui order_g = 0; + for (int i = 1; i < num_group; i++) { + int local_e = 0; + for (int j = 0; j < num_group; j++) { + if (i != j) { + local_e += e_g[i][j]; + } + } + int64_t tmp_t = cartesian[i] * num_group - total_cartesian; + // std::cout <<"Tmp:" << tmp << std::endl; + if (local_e < min_e && tmp_t >= 0) { + order_g = i; + min_e = local_e; + } + } + // std::cout <<"order_g->" << order_g << ", min_e:" << min_e << std::endl; + final_group = order_g; + + std::cout << "Finished prepare" << std::endl; + // (3) partition with all the cases. + + int cases = num_group / 2; + std::vector> all_balance; + std::vector p_cartesian; + std::vector p_edges; + std::vector> l_c_all; + + // There are i kinds of the combination C(1,n) ... C(i,n), C(|num_group|/2,n). + // int tmp[30]; + // for (int i = 0 ; i < 30; i ++) { + // tmp[i] = 0; + // } + for (int i = 1; i <= cases; i++) { + std::vector> cases_balance; + for (int k = 0; k < num_group; k++) { + std::set s1; + s1.insert(k); + cases_balance.push_back(s1); + } + for (int j = 1; j < i; j++) { + std::vector> cases_balance_new; + for (int k = 0; k < num_group; k++) { + for (auto &t : cases_balance) { + if (t.find(static_cast(k)) == t.end()) { + std::set local_case; + local_case = t; + local_case.insert(k); + if (std::find(cases_balance_new.begin(), cases_balance_new.end(), local_case) == + cases_balance_new.end()) { + cases_balance_new.push_back(local_case); + } + } + } + } + cases_balance = cases_balance_new; + } + for (auto &j : cases_balance) { + all_balance.push_back(j); + } + } + + std::cout << "Before do" << std::endl; + if (all_balance.size() == 0) { + order_count_1 = query_count; + order_count_2 = 0; + for (ui j = 0; j < query_count; j++) { + order_1[j] = order[j]; + } + return; + } + + for (uint64_t j = 0; j < all_balance.size(); j++) { + // long double value = 0; + int64_t l_c = 0; + int64_t l_cc = 0; + int l_e = 0; + std::cout << "Group->"; + std::queue q3; + for (auto itr = all_balance[j].begin(); itr != all_balance[j].end(); itr++) { + q3.push(tree[root].children[*itr]); + l_cc *= cartesian[*itr]; + // std::cout <<"," << *itr; + for (int i = 0; i < num_group; i++) { + if (std::find(all_balance[j].begin(), all_balance[j].end(), i) == all_balance[j].end()) { + l_e += e_g[*itr][i]; + } + } + } + + while (!q3.empty()) { + // dequeue front node and print it + ui v = q3.front(); + q3.pop(); + l_c += 1; + // order_1[order_count_1++] = v; + if (tree[v].children_count > 2) { + for (ui m = 0; m < tree[v].children_count; m++) { + q3.push(tree[v].children[m]); + } + } + } + + std::cout << "total_cartesian" << total_cartesian << std::endl; + + l_c = static_cast((query_count) * (query_count) / 4) - (query_count - l_c) * (l_c); + std::tuple tmp; + std::get<0>(tmp) = l_c; + std::get<1>(tmp) = l_e; + std::get<2>(tmp) = j; + l_c_all.push_back(tmp); + + // std::cout <<",l_c, l_e:" << l_c << "," << l_e << std::endl; + + // value = 0; + } + + sort(l_c_all.begin(), l_c_all.end()); + + // std::cout <<"l_c->" <(l_c_all[0]) << std::endl; + // std::cout <<"Check the first one,l_c:" <(l_c_all[0]) << ",l_e:" + // <(l_c_all[0]) + // << + // endl; + + std::cout << "End all prosibility:" << all_balance.size() << std::endl; + + // Group 1 + std::vector node; + std::queue q1, q2; + + for (int i = 0; i < num_group; i++) { + // node.push_back(*all_balance[i].begin()); + + int t = std::get<2>(l_c_all[0]); + if (std::find(all_balance[t].begin(), all_balance[t].end(), i) != all_balance[t].end()) { + q1.push(tree[root].children[i]); + } else { + q2.push(tree[root].children[i]); + } + } + + order_count_1 = 0; + order_count_2 = 0; + // if() + order_1[order_count_1++] = root; + order_2[order_count_2++] = root; + + ui v; + while (!q1.empty()) { + // dequeue front node and print it + v = q1.front(); + q1.pop(); + order_1[order_count_1++] = v; + for (ui j = 0; j < tree[v].children_count; j++) { + q1.push(tree[v].children[j]); + } + } + // Group 2 + + while (!q2.empty()) { + // dequeue front node and print it + v = q2.front(); + q2.pop(); + order_2[order_count_2++] = v; + for (ui j = 0; j < tree[v].children_count; j++) { + q2.push(tree[v].children[j]); + } + } + + if (order_count_1 > order_count_2) { + for (ui k = 0; k < order_count_2; k++) { + ui tmp = order_1[k]; + order_1[k] = order_2[k]; + order_2[k] = tmp; + } + for (ui k = order_count_2; k < order_count_1; k++) { + order_2[k] = order_1[k]; + } + ui tmp = order_count_1; + order_count_1 = order_count_2; + order_count_2 = tmp; + } +} + +void pruneCandidates(Graph *data_graph, + Graph *query_graph, + V_ID query_vertex, + V_ID *pivot_vertices, + ui pivot_vertices_count, + V_ID **candidates, + ui *candidates_count, + ui *flag, + ui *updated_flag) { + L_ID query_vertex_label = query_graph->getVertexLabel(query_vertex); + ui query_vertex_degree = query_graph->getVertexDegree(query_vertex); + + ui count = 0; + ui updated_flag_count = 0; + + for (ui i = 0; i < pivot_vertices_count; ++i) { + V_ID pivot_vertex = pivot_vertices[i]; + + for (ui j = 0; j < candidates_count[pivot_vertex]; ++j) { + V_ID v = candidates[pivot_vertex][j]; + + if (v == INVALID_VERTEX_ID) continue; + ui v_nbrs_count; + V_ID *v_nbrs = data_graph->getVertexNeighbors(v, v_nbrs_count); + + for (ui k = 0; k < v_nbrs_count; ++k) { + V_ID v_nbr = v_nbrs[k]; + L_ID v_nbr_label = data_graph->getVertexLabel(v_nbr); + ui v_nbr_degree = data_graph->getVertexDegree(v_nbr); + + if (flag[v_nbr] == count && v_nbr_label == query_vertex_label && + v_nbr_degree >= query_vertex_degree) { + flag[v_nbr] += 1; + + if (count == 0) { + updated_flag[updated_flag_count++] = v_nbr; + } + } + } + } + count += 1; + } + + for (ui i = 0; i < candidates_count[query_vertex]; ++i) { + ui v = candidates[query_vertex][i]; + if (v == INVALID_VERTEX_ID) continue; + + if (flag[v] != count) { + candidates[query_vertex][i] = INVALID_VERTEX_ID; + } + } + + for (ui i = 0; i < updated_flag_count; ++i) { + ui v = updated_flag[i]; + flag[v] = 0; + } +} + +void Index_To_Candidate(ui **&candidates, + ui *&candidates_count, + std::vector>> &P_Candidates, + ui query_count, + ui data_count, + ui *order, + bool *visited) { + for (ui i = 0; i < query_count; i++) { + ui u = order[i]; + candidates_count[u] = 0; + std::unordered_map> frontiers = P_Candidates[u]; + + for (const auto &frontier : frontiers) { + for (unsigned int j : frontier.second) { + if (visited[j] == false) { + visited[j] = true; + candidates[u][candidates_count[u]++] = j; + } + } + } + std::fill(visited, visited + data_count, false); + } +} + +void compactCandidates(ui **&candidates, ui *&candidates_count, ui query_count) { + for (ui i = 0; i < query_count; ++i) { + V_ID query_vertex = i; + ui next_position = 0; + for (ui j = 0; j < candidates_count[query_vertex]; ++j) { + V_ID data_vertex = candidates[query_vertex][j]; + + if (data_vertex != INVALID_VERTEX_ID) { + candidates[query_vertex][next_position++] = data_vertex; + } + } + + candidates_count[query_vertex] = next_position; + } +} + +void intersection( + std::unordered_map> &query_provenance, + std::unordered_map>>> + &data_provenance, + V_ID &u, + std::unordered_map>> &node_provenance, + V_ID &u_p, + std::unordered_map> &frontiers_1) { + for (const auto &it_1 : frontiers_1) { + V_ID v_f = it_1.first; + for (unsigned int v : it_1.second) { + std::unordered_map> pro; + bool v_existed = false; + if (node_provenance.find(v) != node_provenance.end()) { + v_existed = true; + } + if (v_existed == false) { + for (uint64_t g = 0; g < query_provenance[u].size(); g++) { + if (u == query_provenance[u][g]) { + // add current node and v to the provenance information if this node is one of the + // required provenance ID. + std::vector pro_v; + pro_v.push_back(v); + pro.insert(std::pair>(query_provenance[u][g], pro_v)); + // std::cout <<" !!!!!!!!!!!++++intersect " << pro[query_provenance[u][g]][0] << + // endl; + } else if (data_provenance[u_p][v_f][query_provenance[u][g]].size() != 0) { + pro.insert(std::pair>( + query_provenance[u][g], data_provenance[u_p][v_f][query_provenance[u][g]])); + } + } + if (pro.size() != 0) { + node_provenance.insert( + std::pair>>(v, pro)); + } + } else { + for (uint64_t g = 0; g < query_provenance[u].size(); g++) { + if (u != query_provenance[u][g] && + data_provenance[u_p][v_f][query_provenance[u][g]].size() != 0) { + // add current node and v to the provenance information if this node is one of the + // required provenance ID. + for (uint64_t ll = 0; ll < data_provenance[u_p][v_f][query_provenance[u][g]].size(); + ll++) { + if (std::find(node_provenance[v][query_provenance[u][g]].begin(), + node_provenance[v][query_provenance[u][g]].end(), + data_provenance[u_p][v_f][query_provenance[u][g]][ll]) == + node_provenance[v][query_provenance[u][g]].end()) { + node_provenance[v][query_provenance[u][g]].push_back( + data_provenance[u_p][v_f][query_provenance[u][g]][ll]); + } + } + } + } + } + } + } +} +void CheckRepartiton1(Graph *&query_graph, + ui *&order_1, + ui *&order_2, + ui &order_count_1, + ui &order_count_2, + bool &inside, + bool &outside, + ui &counnt) { + while (inside == false && outside == false && counnt < 10) { + outside = true; + counnt++; + for (ui i = 1; i < order_count_1; i++) { + inside = false; + ui nb_count; + ui *nb = query_graph->getVertexNeighbors(order_1[i], nb_count); + for (ui j = i + 1; j < order_count_1; j++) { + if (std::find(nb, nb + nb_count, order_1[j]) != nb + nb_count) { + inside = true; + break; + } + } + + if (inside == false) { + outside = false; + for (ui j = 1; j < order_count_2; j++) { + nb = query_graph->getVertexNeighbors(order_2[j], nb_count); + if (std::find(nb, nb + nb_count, order_1[i]) != nb + nb_count) { + inside = false; + outside = true; + order_2[order_count_2++] = order_1[i]; + for (ui k = i + 1; k < order_count_1; k++) { + order_1[k - 1] = order_1[k]; + } + order_count_1--; + break; + } + } + } + + if (outside == true && inside == false) { + outside = false; + break; + } + } + } +} +void CheckRepartiton2(Graph *&query_graph, + ui *&order_1, + ui *&order_2, + ui &order_count_1, + ui &order_count_2, + bool &inside, + bool &outside, + ui &counnt) { + while (inside == false && outside == false && counnt < 10) { + outside = true; + counnt++; + for (ui i = 1; i < order_count_2; i++) { + inside = false; + ui nb_count; + ui *nb = query_graph->getVertexNeighbors(order_2[i], nb_count); + for (ui j = i + 1; j < order_count_1; j++) { + if (std::find(nb, nb + nb_count, order_2[j]) != nb + nb_count) { + inside = true; + break; + } + } + if (inside == false) { + outside = false; + for (ui j = 1; j < order_count_1; j++) { + nb = query_graph->getVertexNeighbors(order_1[j], nb_count); + if (std::find(nb, nb + nb_count, order_2[i]) != nb + nb_count) { + inside = false; + outside = true; + order_1[order_count_1++] = order_2[i]; + for (ui k = i + 1; k < order_count_2; k++) { + order_2[k - 1] = order_2[k]; + } + order_count_2--; + break; + } + } + } + if (outside == true && inside == false) { + outside = false; + break; + } + } + } +} +bool CECIFunction(Graph *data_graph, + Graph *query_graph, + ui **&candidates, + ui *&candidates_count, + ui *&order, + ui *&provenance, + TreeNode *&tree, + std::vector>> &P_Candidates, + std::vector>> &P_Provenance) { + // std::cout <<"Initialize function: " << std::endl; + // Initial the CECI Index. + // In this case, + + // double timer_all_s = omp_get_wtime(); + + // std::cout <<"a" << std::endl; + V_ID start_vertex = InitialStartVertex(data_graph, query_graph); + + // start_vertex = 6; + // std::cout <<"Start Vertex: " << start_vertex << std::endl; + + bfs(query_graph, + start_vertex, + tree, + order, + provenance); // Build the tree structure and order from query graph + + // query_count is the number of the vertexs in the query graph. + ui query_count = query_graph->getVerticesCount(); + std::vector order_index(query_count); + + // Build vertex to the order Reverse index; + for (ui i = 0; i < query_count; ++i) { + V_ID query_vertex = order[i]; + order_index[query_vertex] = i; + } + + // In the query graph + // every query tree, build their neighbour and parents information. + // std::cout <<"Begin Building Neighbourhood Informations" << std::endl; + + for (ui i = 0; i < query_count; ++i) { + V_ID u = order[i]; + // tree[u].under_level_count = 0; + tree[u].bn_count = 0; + tree[u].fn_count = 0; + ui u_nbrs_count; + + V_ID *u_nbrs = query_graph->getVertexNeighbors(u, u_nbrs_count); + for (ui j = 0; j < u_nbrs_count; ++j) { + V_ID u_nbr = u_nbrs[j]; + if (order_index[u_nbr] < order_index[u]) { + tree[u].bn[tree[u].bn_count++] = u_nbr; + } else { + tree[u].fn[tree[u].fn_count++] = u_nbr; + } + } + } + + // Initialization + ui candidates_max_count = data_graph->getGraphMaxLabelFrequency(); + + ui data_count = data_graph->getVerticesCount(); + // ui query_count = query_graph->getVerticesCount(); + ui *updated_flag = new ui[data_graph->getVerticesCount()]; + ui *flag = new ui[data_graph->getVerticesCount()]; + std::fill(flag, flag + data_graph->getVerticesCount(), 0); + + candidates_count = new ui[query_count]; + memset(candidates_count, 0, sizeof(ui) * query_count); + + candidates = new ui *[query_count]; + + ui **candidates_inter = new ui *[query_count]; + ui **candidates_index = new ui *[query_count]; + + // ui* candidates_count_inter = new ui[query_count]; + ui *candidates_count_index = new ui[query_count]; + + for (ui i = 0; i < query_count; ++i) { + candidates[i] = new ui[candidates_max_count]; + candidates_inter[i] = new ui[candidates_max_count]; + candidates_index[i] = new ui[candidates_max_count]; + } + + for (ui i = 0; i < query_count; i++) { + ZFComputeNLF( + data_graph, query_graph, order[i], candidates_count[order[i]], candidates[order[i]]); + } + std::cout << "begin" << std::endl; + + // The number of refinement is k. According to the original paper, we set k as 3. + for (ui k = 0; k < 3; ++k) { + if (k % 2 == 0) { + for (ui i = 1; i < query_count; ++i) { + V_ID u = order[i]; + TreeNode &node = tree[u]; + pruneCandidates(data_graph, + query_graph, + u, + node.bn, + node.bn_count, + candidates, + candidates_count, + flag, + updated_flag); + } + } else { + for (int i = query_count - 2; i >= 0; --i) { + V_ID u = order[i]; + TreeNode &node = tree[u]; + pruneCandidates(data_graph, + query_graph, + u, + node.fn, + node.fn_count, + candidates, + candidates_count, + flag, + updated_flag); + } + } + } + + compactCandidates(candidates, candidates_count, query_count); + + for (ui j = 0; j < query_count; j++) { + std::cout << "u->" << order[j] << "Candidates_count->" << candidates_count[order[j]] + << std::endl; + } + std::cout << "Done with Filter" << std::endl; + + candidates_count_index[order[0]] = candidates_count[order[0]]; + + for (ui k = 0; k < candidates_count[order[0]]; k++) { + candidates_index[order[0]][k] = candidates[order[0]][k]; + } + + // std::cout <<"print neighbourhood function" << std::endl; + // which pair of intersection we need + std::unordered_map>> intersetion; + // Size is the length of the query graph . + std::unordered_map> query_provenance; + // Size is . + // std::vector> data_provenance; + // >>> // + // The last item in the data_provenance is corresponding to the different candidates with the same + // provenance key in the query graph. + // + + std::unordered_map>>> + data_provenance; + // This is all + // // 11 + // + for (ui i = 0; i < query_count; ++i) { + V_ID u = order[i]; + V_ID tmp_first = INVALID_VERTEX_ID; + V_ID tmp_second = INVALID_VERTEX_ID; + if (tree[u].bn_count > 1) { + // std::cout <<"Node: (" << u << ")-> "; + tmp_first = tree[u].parent; + for (ui j = 0; j < tree[u].bn_count; j++) { + // std::cout <>>::iterator its = intersetion.begin(); + + std::unordered_map> branching; + + // int i = 0; + + // all + // Check the parent node for each intersection's pair. + + while (its != intersetion.end()) { + std::vector single_branch; + std::vector> inter = its->second; + + for (auto &k : inter) { + V_ID r = ParentNode(k.first, k.second, tree); + // All. + add_provenance(query_provenance, k.first, r); + add_provenance(query_provenance, k.second, r); + single_branch.push_back(r); + } + + if (single_branch.size() > 0) { + branching.insert(std::pair>(its->first, single_branch)); + } + its++; + } + + // Reverse BFS order to record what the provenance their parents need. + // bool need_add = query_count; + for (ui i = query_count - 1; i > 0; --i) { + V_ID u = order[i]; + V_ID u_p = tree[u].parent; + + if (query_provenance.find(u) != query_provenance.end()) { + if (query_provenance.find(u_p) != query_provenance.end()) { + for (uint64_t j = 0; j < query_provenance[u].size(); j++) { + if (std::find(query_provenance[u_p].begin(), + query_provenance[u_p].end(), + query_provenance[u][j]) == query_provenance[u_p].end()) { + query_provenance[u_p].push_back(query_provenance[u][j]); + } + } + + } else { // If the map is empty with index of u_p. + std::unordered_map>::iterator iter; + iter = query_provenance.begin(); + query_provenance.insert(iter, std::pair>(u_p, query_provenance[u])); + } + } + } + + // Print out the tree structure; + + // data_provenance; + + // Start doing for the data graph. + // + // Not a good one; + + // TE_Candidates construction and filtering. + + V_ID root = order[0]; + + // std::cout <<"Root is:" << root << std::endl; + + // We use the ID 0 as their candidates. + + // Initialize the Buffer for the Candidate and Candidate Count: + + // std::cout <<"Initialize Max Count: " << candidates_max_count << std::endl; + + // ZFComputeNLF(data_graph, query_graph, root, candidates_count[root], candidates[root]); + + // std::cout <<"Number of Candidate: " << candidates_count[root] << std::endl; + + // No Candidates with the root + if (candidates_count[root] == 0) { + std::cout << "Build Candidate Fail" << std::endl; + return false; + } + + // Building the P_Candidates. + std::vector data_visited(data_count); + + std::fill(data_visited.begin(), data_visited.end(), 0); + + std::vector data_visited_2(data_count); + + std::fill(data_visited_2.begin(), data_visited_2.end(), 0); + + std::vector visited_query(query_count); + std::fill(visited_query.begin(), visited_query.end(), false); + + visited_query[root] = true; + + // Candidates and Provenances + P_Candidates.resize(query_count); + P_Provenance.resize(query_count); + + // Initialization of the root's Candidates + // + + std::unordered_map> root_candidate; + for (ui i = 0; i < candidates_count[root]; i++) { + std::vector tmp; + tmp.push_back(candidates[root][i]); + root_candidate.insert(std::pair>(candidates[root][i], tmp)); + } + + P_Candidates[root] = root_candidate; + + // std::cout <<"Begin BFS and intersetion Function: " << std::endl; + + // Record the start provenance in the data graph. + std::unordered_map Provenance; + std::unordered_map> + Close_provenance; // >. + + // record the index and provenance for the root. + // + + // This records according to order index. + // + + if (query_provenance.find(root) != query_provenance.end()) { + std::unordered_map>> node_provenance; + for (ui i = 0; i < candidates_count[root]; i++) { + std::vector pro_v; + pro_v.push_back(candidates[root][i]); + std::unordered_map> pro; + std::unordered_map>::iterator iter = pro.begin(); + pro.insert(iter, std::pair>(root, pro_v)); + + node_provenance.insert( + std::pair>>(candidates[root][i], pro)); + } + + data_provenance.insert( + std::pair>>>( + root, node_provenance)); + } + + // return 0; + + // This one + // + + for (ui i = 1; i < query_count; ++i) { + // std::cout <<"####check candidate 4->0 is->" << candidates[4][0] << std::endl; + V_ID u = order[i]; + // TreeNode& u_node = tree[u]; + bool do_interset = false; + if (intersetion.find(u) != intersetion.end()) { + // std::cout <<"In the key of Intersetion." << std::endl; + do_interset = true; + } + // sets of frontiers and count for parent node. + + ui u_l = query_graph->getVertexLabel(u); + ui u_d = query_graph->getVertexDegree(u); + ui u_f = tree[u].parent; + + // get NLF of current node. + // std::unordered_map* u_nlf = query_graph->getVertexNLF(u); + candidates_count_index[u] = 0; + // initial each candidate_count of each node to be 0. + // V_ID* frontiers = candidates[u]; + std::unordered_map> nb_interset; + + for (ui j = 0; j < candidates_count[u]; ++j) { + V_ID v = candidates[u][j]; + data_visited_2[v] = 1; + } + + if (do_interset && 1 == 2) { + int length_inter = intersetion[u].size(); + for (int d = 0; d < length_inter; d++) { + std::vector tt; + if (visited_query[u] == true) { + // ui u_1 = intersetion[u][d].first; + ui u_2 = intersetion[u][d].second; + ui u_2_f = tree[u_2].parent; + + // v_1 and u_1 will be the parent nodes. + std::unordered_map> frontiers_2 = P_Candidates[u_2]; + ui frontiers_count_2 = frontiers_2.size(); + for (auto it = nb_interset.cbegin(); it != nb_interset.cend();) { + ui v_1 = it->first; + bool find_v_1 = false; + std::unordered_map>::iterator item = frontiers_2.begin(); + std::vector tmp_inter; + for (ui k = 0; k < frontiers_count_2; ++k) { + ui v_2_f = item->first; // get the parent ID fron the Candidate index. + ui pro_node = branching[u][d]; + bool if_pro = false; + for (ui k_1 = 0; k_1 < data_provenance[u_f][v_1][pro_node].size(); k_1++) { + for (ui k_2 = 0; k_2 < data_provenance[u_2_f][v_2_f][pro_node].size(); k_2++) { + if (data_provenance[u_f][v_1][pro_node][k_1] == + data_provenance[u_2_f][v_2_f][pro_node][k_2]) { + if_pro = true; + } + } + } + if (if_pro == true) { + for (unsigned int v_2 : item->second) { + ui data_nbrs_count_2; + V_ID *data_nbrs_2 = data_graph->getVertexNeighbors(v_2, data_nbrs_count_2); + for (unsigned int p : it->second) { + for (ui q = 0; q < data_nbrs_count_2; q++) { + if (p == data_nbrs_2[q] && + find(tmp_inter.begin(), tmp_inter.end(), p) == tmp_inter.end()) { + tmp_inter.push_back(data_nbrs_2[q]); + find_v_1 = true; + } + } + } + } + } + item++; + } + if (tmp_inter.size() > 0) { + find_v_1 = true; + nb_interset[v_1] = tmp_inter; + } + if (find_v_1 == false) { + nb_interset.erase(it++); + } else { + ++it; + } + } + // Else + } else { + visited_query[u] = true; + ui u_1 = intersetion[u][d].first; + ui u_2 = intersetion[u][d].second; + ui u_1_f = u_1; + ui pro_node = branching[u][d]; + ui u_2_f = tree[u_2].parent; + if (u_1 != root) { + u_1_f = tree[u_1].parent; + } + if (DEBUG == 1) { + std::cout << u_1 << " " << u_2 << "Unvisited, Pro-->" << pro_node << std::endl; + } + std::unordered_map> frontiers_1 = P_Candidates[u_1]; + std::unordered_map> frontiers_2 = P_Candidates[u_2]; + // ui frontiers_count_1 = frontiers_1.size(); + // ui frontiers_count_2 = frontiers_2.size(); + if (u_1 == root || u_1 == pro_node) { + u_1_f = u_1; + for (const auto &it_1 : frontiers_1) { + std::vector c_1 = it_1.second; + for (unsigned int v_1 : c_1) { + // std::vector tmp_inter; + for (const auto &it_2 : frontiers_2) { + ui v_2_f = it_2.first; + bool if_pro = false; + for (unsigned int k_2 : data_provenance[u_2_f][v_2_f][pro_node]) { + if (v_1 == k_2) { + if_pro = true; + } + } + std::vector c_2 = it_2.second; + std::vector tmp_inter; + if (if_pro == true) { + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(v_1, data_nbrs_count_1); + + for (unsigned int v_2 : c_2) { + ui data_nbrs_count_2; + V_ID *data_nbrs_2 = data_graph->getVertexNeighbors(v_2, data_nbrs_count_2); + // label filter and degree filter should be added here + for (ui p = 0; p < data_nbrs_count_1; p++) { + for (ui q = 0; q < data_nbrs_count_2; q++) { + ui t = data_nbrs_1[p]; + // std::cout <<"Nbrs_1->" << t << "Nbrs_2->" << data_nbrs_2[q] << + // std::endl; + if (t == data_nbrs_2[q] && data_graph->getVertexLabel(t) == u_l && + data_graph->getVertexDegree(t) >= u_d) { + if (std::find(tmp_inter.begin(), tmp_inter.end(), t) == + tmp_inter.end()) { + tmp_inter.push_back(t); + } + } + } + } + } + } + + if (tmp_inter.size() > 0) { + nb_interset.insert(std::pair>(v_1, tmp_inter)); + } + } + } + } + } else { + // u_1 is neither root nor parent. + for (const auto &it_1 : frontiers_1) { + ui v_1_f = it_1.first; + std::vector c_1 = it_1.second; + for (unsigned int v_1 : c_1) { + std::vector tmp_inter; + for (const auto &it_2 : frontiers_2) { + ui v_2_f = it_2.first; + bool if_pro = false; + for (ui k_1 = 0; k_1 < data_provenance[u_1_f][v_1_f][pro_node].size(); k_1++) { + for (ui k_2 = 0; k_2 < data_provenance[u_2_f][v_2_f][pro_node].size(); k_2++) { + if (data_provenance[u_1_f][v_1_f][pro_node][k_1] == + data_provenance[u_2_f][v_2_f][pro_node][k_2]) { + if_pro = true; + break; + } + } + if (if_pro) { + break; + } + } + if (if_pro) { + std::vector c_2 = it_2.second; + ui data_nbrs_count_1; + V_ID *data_nbrs_1 = data_graph->getVertexNeighbors(v_1, data_nbrs_count_1); + for (unsigned int v_2 : c_2) { + ui data_nbrs_count_2; + V_ID *data_nbrs_2 = data_graph->getVertexNeighbors(v_2, data_nbrs_count_2); + for (ui p = 0; p < data_nbrs_count_1; p++) { + for (ui q = 0; q < data_nbrs_count_2; q++) { + ui t = data_nbrs_1[p]; + if (t == data_nbrs_2[q] && data_graph->getVertexLabel(t) == u_l && + data_graph->getVertexDegree(t) >= u_d) { + if (std::find(tmp_inter.begin(), tmp_inter.end(), t) == + tmp_inter.end()) { + tmp_inter.push_back(t); + } + } + } + } + } + } + } + if (tmp_inter.size() > 0) { + nb_interset.insert(std::pair>(v_1, tmp_inter)); + } + } + } + } + } + // V_ID u_p = tree[u].parent; + } + + bool find_u = false; + + std::unordered_map>> node_provenance; + + if (query_provenance.find(u) != query_provenance.end()) { + // All. + find_u = true; + } + V_ID u_p = tree[u].parent; + + for (auto &itt : nb_interset) { + V_ID v_f = itt.first; + auto tmp = P_Candidates[u].emplace(v_f, std::vector()); + for (unsigned int v : itt.second) { + if (data_visited_2[v] == 1) { + tmp.first->second.push_back(v); + if (data_visited[v] == 0) { + data_visited[v] = 1; + candidates_index[u][candidates_count_index[u]++] = v; + } + } + } + if (tmp.first->second.size() == 0) { + P_Candidates[u].erase(v_f); + + // frontiers[j] = INVALID_VERTEX_ID; + for (ui k = 0; k < tree[u_p].children_count; ++k) { + V_ID u_c = tree[u_p].children[k]; + if (visited_query[u_c]) { + P_Candidates[u_c].erase(v_f); + } + } + } + } + if (find_u) { + std::unordered_map> frontiers_1 = P_Candidates[u]; + + intersection(query_provenance, data_provenance, u, node_provenance, u_p, frontiers_1); + } + if (node_provenance.size() != 0) { + data_provenance.insert( + std::pair>>>( + u, node_provenance)); + } + // std::cout <<"tmp print it all EndEnd !!!" << std::endl; + for (ui j = 0; j < candidates_count_index[u]; ++j) { + V_ID v = candidates_index[u][j]; + data_visited[v] = 0; // reset the data_visited for the data graph. + } + } + if (visited_query[u] == false) { + // BFS function + // std::cout <<"" << std::endl; + // std::cout <<"BFS node: { " << u << " }-> "; + visited_query[u] = true; + // All + V_ID u_p = tree[u].parent; + + // Records u's label and degree. + // std::cout <<"parent is->" << u_p << std::endl; + V_ID *frontiers = candidates_index[u_p]; + ui frontiers_count = candidates_count_index[u_p]; + // std::cout <<"Frontier Candidate: "; + if (DEBUG) { + std::cout << u << " is BFS and parent node is-->" << u_p << std::endl; + std::cout << "Frontier count is --> " << frontiers_count << std::endl; + } + + // std::cout <<"parent is->" << u_p << ", frontier_count->" << frontiers_count << std::endl; + bool find_u = false; + if (query_provenance.find(u) != query_provenance.end()) { + // All. + find_u = true; + } + std::unordered_map>> node_provenance; + + for (ui j = 0; j < frontiers_count; ++j) { + V_ID v_f = frontiers[j]; // for all its frontiers nodes + // std::cout <<"," << v_f ; + // If it's not a valid one for building the CECI idnex, skip + if (v_f == INVALID_VERTEX_ID) { + std::cout << " " << v_f << "is a invalid id"; + continue; + } + ui data_nbrs_count; + V_ID *data_nbrs = data_graph->getVertexNeighbors(v_f, data_nbrs_count); + // std::cout <<"V_f is->" << v_f << std::endl; + // This is for the vector of candidates. + auto tmp = P_Candidates[u].emplace(v_f, std::vector()); + // std::cout <<"VF is ->" << v_f << " "; + // P_Candidates + // bool is_provenance = false; + // std::cout <<"Neighbourhood is: "; + for (ui k = 0; k < data_nbrs_count; ++k) { + V_ID v = data_nbrs[k]; + // std::cout <is valid" << std::endl; + tmp.first->second.push_back(v); + if (data_visited[v] == 0) { + data_visited[v] = 1; + candidates_index[u][candidates_count_index[u]++] = v; + } + + if (find_u) { + if (node_provenance.find(v) == node_provenance.end()) { + // All. + std::unordered_map> pro; + for (uint64_t g = 0; g < query_provenance[u].size(); g++) { + if (u == query_provenance[u][g]) { + // add current node and v to the provenance information if this node is one of + // the required provenance ID. + std::vector pro_v; + pro_v.push_back(v); + pro.insert(std::pair>(query_provenance[u][g], pro_v)); + // std::cout <<" !!!!!!!!!!! " << pro[query_provenance[u][g]][0] << + // std::endl; + } else if (data_provenance[u_p][v_f][query_provenance[u][g]].size() != 0) { + // pass the provenance information from the parent node to its child node. + // + // + if (pro.find(query_provenance[u][g]) == pro.end()) { + pro.insert(std::pair>( + query_provenance[u][g], + data_provenance[u_p][v_f][query_provenance[u][g]])); + } else { + std::vector tmp_v = data_provenance[u_p][v_f][query_provenance[u][g]]; + for (unsigned int &m : tmp_v) { + std::vector::iterator q_t = + find(node_provenance[v][query_provenance[u][g]].begin(), + node_provenance[v][query_provenance[u][g]].end(), + m); + if (q_t == node_provenance[v][query_provenance[u][g]].end()) { + node_provenance[v][query_provenance[u][g]].push_back(m); + } + } + } + } + } + if (pro.size() != 0) { + node_provenance.insert( + std::pair>>(v, pro)); + } + } else { + for (uint64_t g = 0; g < query_provenance[u].size(); g++) { + if (data_provenance[u_p][v_f][query_provenance[u][g]].size() != 0) { + // pass the provenance information from the parent node to its child node. + if (node_provenance[v].find(query_provenance[u][g]) == + node_provenance[v].end()) { + node_provenance[v].insert(std::pair>( + query_provenance[u][g], + data_provenance[u_p][v_f][query_provenance[u][g]])); + // std::cout <<"*********" << pro[query_provenance[u][g]][0] << std::endl; + } else { + std::vector tmp_v = data_provenance[u_p][v_f][query_provenance[u][g]]; + for (unsigned int &m : tmp_v) { + std::vector::iterator q_t = + find(node_provenance[v][query_provenance[u][g]].begin(), + node_provenance[v][query_provenance[u][g]].end(), + m); + if (q_t == node_provenance[v][query_provenance[u][g]].end()) { + node_provenance[v][query_provenance[u][g]].push_back(m); + } + } + } + } + } + } + } + } + } + } + + if (tmp.first->second.empty()) { + // set it as invalid + // std::cout <<" Erase it. "<< std::endl; + P_Candidates[u].erase(v_f); + + frontiers[j] = INVALID_VERTEX_ID; + for (ui k = 0; k < tree[u_p].children_count; ++k) { + V_ID u_c = tree[u_p].children[k]; + if (visited_query[u_c]) { + // std::cout <<"u_c is " << u_c <<", v_f is " << v_f; + P_Candidates[u_c].erase(v_f); + } + } + } + } + + if (find_u && node_provenance.size() != 0) { + data_provenance.insert( + std::pair>>>( + u, node_provenance)); + } + + if (candidates_count_index[u] == 0) { + std::cout << "Node: " << u << " Fail" << std::endl; + // std::cout <<"Pro: " << std::endl; + return false; + } + // flag reset. + for (ui j = 0; j < candidates_count_index[u]; ++j) { + V_ID v = candidates_index[u][j]; + data_visited[v] = 0; // reset the data_visited for the data graph. + } + } + + for (ui j = 0; j < candidates_count[u]; ++j) { + V_ID v = candidates[u][j]; + data_visited_2[v] = 0; + } + } + // Temp print + + if (DEBUG) { + std::cout << " ### Reverse Refinement" << std::endl; + } + + std::cout << "Refinement->" << std::endl; + + for (int i = query_count - 1; i >= 0; i--) { + // std::cout <<"Current u->"; + ui u = order[i]; + // std::cout < ######## P->" << P_Candidates[u].size() << std::endl; + } + + int all_cc = 0; + std::unordered_map> resss; + local_optimization(resss, + query_count, + order_index, + intersetion, + order, + tree, + P_Candidates, + data_provenance, + data_graph, + query_graph); + + all_cc = resss.size(); + all_cc = all_cc + 1 - 1; + // int ress = 0; + + // Enumeration + // cuTS enumeration: + // (1) + + // std::cout <<"#### all function -> ####" << std::endl << std::endl; + ui **candidates_2 = new ui *[query_count]; + int64_t **parent_offset = new int64_t *[query_count]; + int64_t *candidates_l = new int64_t[query_count]; + ui *parent_l = new ui[query_count]; + // bool* visited_d = new bool[candidates_max_count]; + + for (ui i = 0; i < query_count; ++i) { + candidates_2[i] = new ui[Q_LIMIT]; + parent_offset[i] = new int64_t[Q_LIMIT]; + parent_l[i] = 0; + candidates_l[i] = 0; + } + // Enumeration: + // (1) Process the query graph and find out all the pattern edges information. + + std::vector> pattern_e; + // std::vector>>> data_e; + std::vector pattern_candidates; + for (ui i = 0; i < query_count - 1; ++i) { + std::vector tmp_e; + V_ID u = order[i]; + ui u_nbrs_count; + int tmp_cc = candidates_count[u]; + /*unordered_map> frontiers_1 = P_Candidates[u]; +std::cout <second.size(); + }*/ + + V_ID *u_nbrs = query_graph->getVertexNeighbors(u, u_nbrs_count); + for (ui j = 0; j < u_nbrs_count; ++j) { + V_ID u_nbr = u_nbrs[j]; + // If this node is not their parent nodes and has less index(which means haven't visited yet). + if (order_index[u_nbr] > order_index[u]) { + // Recordes the pattern edges information. + tmp_e.push_back(u_nbr); + } + } + pattern_e.push_back(tmp_e); + pattern_candidates.push_back(tmp_cc); + // std::cout <<"Node->" << u << ", Candidates:" << tmp_cc << ",order->" << i << std::endl; + } + + V_ID u_2 = order[query_count - 1]; + // ui u_nbrs_count; + ui tmp_cc = candidates_count[u_2]; + + /*unordered_map> frontiers_1 = P_Candidates[u_2]; + for (auto it_1 = frontiers_1.cbegin(); it_1 != frontiers_1.cend(); it_1 ++) { + tmp_cc += it_1->second.size(); + }*/ + + pattern_candidates.push_back(tmp_cc); + + // (2) Process the data graph to find out all the candidates for the data graph. + + // std::cout <<"end 1" << std::endl; + + // (3) + // This is all. Partition the two subgraph by calculating their cartesian && the candidates + // of edges. + // Partition the query graph into two subgroup + + int cartesian[100]; + ui r = order[0]; + ui final_group; + + // (4) Enumerate local result + + ui *order_1 = new ui[100]; + ui *order_2 = new ui[100]; + ui order_count_1 = 0; + ui order_count_2 = 0; + + std::cout << "in" << std::endl; + + bool split = false; + + if (split == false) { + cartesian_s(cartesian, + order, + order_index, + tree, + pattern_candidates, + r, + pattern_e, + final_group, + order_1, + order_2, + order_count_1, + order_count_2, + query_count); + } else { + std::string myText; + + // Read from the text file + std::ifstream MyReadFile("fiedler"); + + // Use a while loop together with the getline() function to read the file line by line + // int oo = 0; + getline(MyReadFile, myText); + + // char type; + V_ID id; + L_ID class_id; + while (MyReadFile >> id) { + MyReadFile >> class_id; + + std::cout << id << "," << class_id << std::endl; + } + + // Close the file + MyReadFile.close(); + return false; + // Read the partition result. + } + + // Here is the section for the coding. + // + + bool inside = false; + bool outside = false; + ui counnt = 0; + ui *order_1_d = new ui[query_count]; + ui *order_2_d = new ui[query_count]; + ui order_1_d_count = order_count_1; + ui order_2_d_count = order_count_2; + + for (ui i = 0; i < order_count_1; i++) { + order_1_d[i] = order_1[i]; + } + + for (ui i = 0; i < order_count_2; i++) { + order_2_d[i] = order_2[i]; + } + + CheckRepartiton1( + query_graph, order_1, order_2, order_count_1, order_count_2, inside, outside, counnt); + + // if (order_count_1 > 1 ) { + counnt = 0; + inside = false; + outside = false; + CheckRepartiton2( + query_graph, order_1, order_2, order_count_1, order_count_2, inside, outside, counnt); + // bool nodo = false; + if (order_count_1 <= 1 || order_count_2 <= 1) { + // nodo = true; + } + if (order_count_1 <= 2 || order_count_2 <= 2) { + // nodo = true; + std::cout << "Should not do it" << std::endl; + order_count_1 = order_1_d_count; + order_count_2 = order_2_d_count; + for (ui i = 0; i < order_count_1; i++) { + order_1[i] = order_1_d[i]; + } + for (ui i = 0; i < order_count_2; i++) { + order_2[i] = order_2_d[i]; + } + } + + static V_ID *visit_local_2 = new V_ID[data_count]; + + static bool *visited = new bool[data_count]; + std::fill(visited, visited + data_count, false); + Index_To_Candidate(candidates_index, + candidates_count_index, + P_Candidates, + query_count, + data_count, + order, + visited); + + for (ui j = 0; j < query_count; j++) { + std::cout << "u->" << order[j] << "Candidates_count->" << candidates_count_index[order[j]] + << std::endl; + } + + for (ui i = 1; i < order_count_1; i++) { + for (ui j = i + 1; j < order_count_1; j++) { + if (candidates_count_index[order_1[i]] > 1.5 * candidates_count_index[order_1[j]]) { + ui tmp10 = order_1[i]; + order_1[i] = order_1[j]; + order_1[j] = tmp10; + } + } + } + + for (ui i = 1; i < order_count_2; i++) { + for (ui j = i + 1; j < order_count_2; j++) { + if (candidates_count_index[order_2[i]] > 1.5 * candidates_count_index[order_2[j]]) { + ui tmp10 = order_2[i]; + order_2[i] = order_2[j]; + order_2[j] = tmp10; + } + } + } + + std::cout << "out" << std::endl; + + // std::cout <<"Test 2" << std::endl; + bool *p_3_1 = new bool[query_count]; + + bool *p_3_2 = new bool[query_count]; + + for (ui i = 0; i < query_count; i++) { + p_3_1[i] = false; + p_3_2[i] = false; + } + + // (5.1) Load edges information,std::cout <<"Load edge->" << std::endl; + for (ui i = 1; i < order_count_1; i++) { + for (ui j = 1; j < order_count_2; j++) { + // std::cout <<"Tmp:" << tmp << std::endl; + // if (order_1[i] != order_2[j]) { + if (order_index[order_2[j]] > order_index[order_1[i]]) { + ui tmp = order_index[order_1[i]]; + if (std::find(pattern_e[tmp].begin(), pattern_e[tmp].end(), order_2[j]) != + pattern_e[tmp].end()) { + p_3_1[order_1[i]] = true; + p_3_2[order_2[j]] = true; + } + } else if (order_index[order_2[j]] < order_index[order_1[i]]) { + ui tmp = order_index[order_2[j]]; + if (std::find(pattern_e[tmp].begin(), pattern_e[tmp].end(), order_1[i]) != + pattern_e[tmp].end()) { + p_3_1[order_1[i]] = true; + p_3_2[order_2[j]] = true; + } + } + + // if( order_1[i] != order_2[j] and query_graph->getVertexLabel(order_1[i]) == + // query_graph->getVertexLabel(order_2[j])) { + // p_3_1[order_1[i]] = true; + // p_3_2[order_2[j]] = true; + // } + //} + } + } + + // if (1 == 2) { + // ui move = 0; + // for (ui i = 1; i < order_count_1 - move; i++) { + // if (p_3_1[order_1[i]] == false) { + // bool doit = true; + // ui nb_count; + // ui *nb = query_graph->getVertexNeighbors(order_1[i], nb_count); + + // for (ui j = 0; j < nb_count; j++) { + // bool inorder = false; + // for (ui k = 0; k < order_count_1; k++) { + // if (order_1[k] == nb[j]) { + // inorder = true; + // } + // } + // if (nodo == false) { + // if (candidates_count_index[order_1[i]] < 1.5 * candidates_count_index[nb[j]] && + // inorder) { + // doit = false; + // p_3_1[order_1[i]] = true; + // } + // } else { + // if (candidates_count_index[order_1[i]] < 1.2 * candidates_count_index[nb[j]] && + // inorder) { + // doit = false; + // p_3_1[order_1[i]] = true; + // } + // } + // } + + // if (doit) { + // ui tmp2 = order_1[i]; + // move += 1; + // for (ui j = i; j < order_count_1 - 1; j++) { + // ui tmp = order_1[j]; + // order_1[j] = order_1[j + 1]; + // order_1[j + 1] = tmp; + // } + // if (order_1[i] != tmp2) { + // i -= 1; + // } + // } + // } + // } + + // move = 0; + // for (ui i = 1; i < order_count_2 - move; i++) { + // if (p_3_2[order_2[i]] == false) { + // bool doit = true; + + // std::cout << std::endl; + // ui nb_count; + // ui *nb = query_graph->getVertexNeighbors(order_2[i], nb_count); + + // for (ui j = 0; j < nb_count; j++) { + // bool inorder = false; + // for (ui k = 0; k < order_count_2; k++) { + // if (order_2[k] == nb[j]) { + // inorder = true; + // } + // } + // if (nodo == false) { + // if (candidates_count_index[order_2[i]] < 1.5 * candidates_count_index[nb[j]] && + // inorder) { + // doit = false; + // p_3_2[order_2[i]] = true; + // } + // } else { + // if (candidates_count_index[order_2[i]] < 1.2 * candidates_count_index[nb[j]] && + // inorder) { + // doit = false; + // p_3_2[order_2[i]] = true; + // } + // } + // } + + // if (doit) { + // ui tmp2 = order_2[i]; + // move += 1; + // for (ui j = i; j < order_count_2 - 1; j++) { + // ui tmp = order_2[j]; + // order_2[j] = order_2[j + 1]; + // order_2[j + 1] = tmp; + // } + // if (order_2[i] != tmp2) { + // i -= 1; + // } + // } + // } + // } + // } + + static V_ID **res_1 = new V_ID *[query_count]; + static V_ID **res_2 = new V_ID *[query_count]; + static V_ID **res_1_n = new V_ID *[query_count]; + static V_ID **res_2_n = new V_ID *[query_count]; + + static int64_t **parent_offset_1 = new int64_t *[query_count]; + static int64_t **children_offset_1 = new int64_t *[query_count]; + static int64_t **children_offset_2 = new int64_t *[query_count]; + static int64_t **parent_offset_2 = new int64_t *[query_count]; + + static int64_t **parent_offset_1_n = new int64_t *[query_count]; + static int64_t **parent_offset_2_n = new int64_t *[query_count]; + + int64_t *candidates_l_1 = new int64_t[query_count]; + int64_t *candidates_l_2 = new int64_t[query_count]; + + int64_t *candidates_l_1_n = new int64_t[query_count]; + int64_t *candidates_l_2_n = new int64_t[query_count]; + + for (ui i = 0; i < query_count; ++i) { + res_1[i] = new V_ID[Q_LIMIT]; + res_2[i] = new V_ID[Q_LIMIT]; + res_1_n[i] = new V_ID[Q_LIMIT]; + res_2_n[i] = new V_ID[Q_LIMIT]; + + parent_offset_1[i] = new int64_t[Q_LIMIT]; + parent_offset_2[i] = new int64_t[Q_LIMIT]; + + parent_offset_1_n[i] = new int64_t[Q_LIMIT]; + parent_offset_2_n[i] = new int64_t[Q_LIMIT]; + + children_offset_1[i] = new int64_t[Q_LIMIT]; + children_offset_2[i] = new int64_t[Q_LIMIT]; + + candidates_l_1[i] = 0; + candidates_l_2[i] = 0; + + candidates_l_1_n[i] = 0; + candidates_l_2_n[i] = 0; + } + + // int NTHREADS = 1, nthreads; + int64_t total_result = 0; + // double sum[NTHREADS]; + // double timer_start1 = omp_get_wtime(); + // omp_set_num_threads(NTHREADS); + + std::vector order_index_1(query_count); + std::vector order_index_2(query_count); + + // std::cout <<"Group original->"; + for (ui i = 0; i < query_count; i++) { + ui query_vertex = order[i]; + std::cout << query_vertex << ","; + order_index_1[query_vertex] = 99999; + order_index_2[query_vertex] = 99999; + } + + std::cout << "Group 1->" << std::endl; + + for (ui i = 0; i < order_count_1; i++) { + ui query_vertex = order_1[i]; + std::cout << query_vertex << ","; + order_index_1[query_vertex] = i; + } + + std::cout << std::endl; + + std::cout << "Group 2->" << std::endl; + for (ui i = 0; i < order_count_2; i++) { + ui query_vertex = order_2[i]; + std::cout << query_vertex << ","; + order_index_2[query_vertex] = i; + } + + std::cout << std::endl; + + // static bool *visit_all = new bool[data_count*data_count]; + // std::fill(visit_all, visit_all+data_count*data_count, false); + /* + for(ui i=0; i < data_count; i++) { + ui data_count_1; + V_ID *data_nb = data_graph->getVertexNeighbors(i, data_count_1); + for(ui j=0; j < data_count; j++) { + visit_all[i*data_count + j] = true; + } + }*/ + + // Morphism of partition 1. + static bool morphism_1[32]; + + for (ui i = 0; i < order_count_1; i++) { + morphism_1[i] = false; + for (ui j = 0; j < order_count_1; j++) { + if (i != j && + query_graph->getVertexLabel(order_1[i]) == query_graph->getVertexLabel(order_1[j])) { + morphism_1[i] = true; + break; + } + } + } + + // Morphism of partition 2. + /* + static bool morphism_2[32]; + for (ui i = 0; i < order_count_2; i ++) { + morphism_2[i] = false; + for (ui j = 0; j < order_count_2; j ++) { + if (i!=j and query_graph->getVertexLabel(order_2[i]) == + query_graph->getVertexLabel(order_2[j])) { morphism_2[i] = true; break; + } + } + } + */ + + std::cout << "Middle->" << std::endl; + // Connection and offset. + // static V_ID *visit_local_2 = new V_ID[data_count]; + + static V_ID *connection = new V_ID[query_count * query_count]; + + static V_ID *offset = new V_ID[query_count]; + + for (ui i = 0; i < query_count; ++i) { + offset[i] = 0; + if (i != 0) { + offset[i] = offset[i - 1]; + } + V_ID u = order[i]; + // V_ID u_p = tree[u].parent; + ui u_nbrs_count; + V_ID *u_nbrs = query_graph->getVertexNeighbors(u, u_nbrs_count); + for (ui j = 0; j < u_nbrs_count; ++j) { + V_ID u_nbr = u_nbrs[j]; + // If this node is not their parent nodes and has less index(which means haven't visited yet). + if (order_index[u_nbr] < order_index[u]) { + connection[offset[i]] = order_index[u_nbr]; + offset[i] += 1; + } + /*if (order_index[u_nbr] > order_index[u]) { + connection[offset[i]] = order_index[u_nbr]; + offset[i] += 1; + }*/ + } + } + + static V_ID *connection_1 = new V_ID[query_count * query_count]; + static V_ID *offset_1 = new V_ID[query_count]; + for (ui i = 0; i < order_count_1; ++i) { + offset_1[i] = 0; + if (i != 0) { + offset_1[i] = offset_1[i - 1]; + } + + V_ID u = order_1[i]; + // V_ID u_p = tree[u].parent; + ui u_nbrs_count; + V_ID *u_nbrs = query_graph->getVertexNeighbors(u, u_nbrs_count); + for (ui j = 0; j < u_nbrs_count; ++j) { + V_ID u_nbr = u_nbrs[j]; + // If this node is not their parent nodes and has less index(which means haven't visited yet). + if (order_index_1[u_nbr] < order_index_1[u]) { + connection_1[offset_1[i]] = order_index_1[u_nbr]; + offset_1[i] += 1; + } + /*if (order_index[u_nbr] > order_index[u]) { + connection[offset[i]] = order_index[u_nbr]; + offset[i] += 1; + }*/ + } + } + + static V_ID *connection_2 = new V_ID[query_count * query_count]; + static V_ID *offset_2 = new V_ID[query_count]; + for (ui i = 0; i < order_count_2; ++i) { + offset_2[i] = 0; + if (i != 0) { + offset_2[i] = offset_2[i - 1]; + } + V_ID u = order_2[i]; + // V_ID u_p = tree[u].parent; + ui u_nbrs_count; + V_ID *u_nbrs = query_graph->getVertexNeighbors(u, u_nbrs_count); + for (ui j = 0; j < u_nbrs_count; ++j) { + V_ID u_nbr = u_nbrs[j]; + // If this node is not their parent nodes and has less index(which means haven't visited yet). + if (order_index_2[u_nbr] < order_index_2[u]) { + connection_2[offset_2[i]] = order_index_2[u_nbr]; + offset_2[i] += 1; + } + } + } + + int64_t res_all_1 = 0; + ui u = order_1[0]; + static V_ID res_r_1[32]; + + std::cout << "Before" << std::endl; + + auto f_4 = std::chrono::steady_clock::now(); + + // Local Enumeration 1 + u = order_1[0]; + // total_1 = 0; + // int current_order = 0; + std::vector local_4; + // std::vector<>isll + + ui u_1_0 = order_1[0]; + for (ui i = 0; i < candidates_count[u_1_0]; i++) { + res_1[0][i] = candidates[u_1_0][i]; + res_all_1 += 1; + } + candidates_l_1[0] = res_all_1; + + // (5.1) Load edges information,std::cout <<"Load edge->" << std::endl; + // std::cout <<"Test 2" << std::endl; + std::vector> p_e; + + for (ui i = 1; i < order_count_1; i++) { + for (ui j = 1; j < order_count_2; j++) { + // std::cout <<"Tmp:" << tmp << std::endl; + // if (order_1[i] != order_2[j]) { + if (order_index[order_2[j]] > order_index[order_1[i]]) { + ui tmp = order_index[order_1[i]]; + if (std::find(pattern_e[tmp].begin(), pattern_e[tmp].end(), order_2[j]) != + pattern_e[tmp].end()) { + p_e.emplace_back(i, j); + } + } else if (order_index[order_2[j]] < order_index[order_1[i]]) { + ui tmp = order_index[order_2[j]]; + if (std::find(pattern_e[tmp].begin(), pattern_e[tmp].end(), order_1[i]) != + pattern_e[tmp].end()) { + p_e.emplace_back(i, j); + } + } + //} + } + } + // std::cout << "This" << p_e.size() << std::endl; + + // (5.2) Load isomorphism information. + // . . . + std::vector> i_e; + + for (ui i = 1; i < order_count_1; i++) { + for (ui j = 1; j < order_count_2; j++) { + if (order_1[i] != order_2[j] && + query_graph->getVertexLabel(order_1[i]) == query_graph->getVertexLabel(order_2[j])) { + i_e.emplace_back(i, j); + } + } + } + + L_ID *l_1 = new L_ID[order_count_1]; + L_ID *l_2 = new L_ID[order_count_2]; + bool *mor = new bool[order_count_2]; + + V_ID *con_2 = new V_ID[order_count_2]; + + V_ID *con_2_2 = new V_ID[order_count_1]; + + for (ui i = 0; i < order_count_1; i++) { + l_1[i] = query_graph->getVertexLabel(order_1[i]); + con_2_2[i] = 0; + } + + for (ui i = 0; i < order_count_2; i++) { + con_2[i] = 0; + mor[i] = false; + l_2[i] = query_graph->getVertexLabel(order_2[i]); + for (ui j = 0; j < i; j++) { + if (l_2[i] == l_2[j]) { + mor[i] = true; + mor[j] = true; + } + } + } + + std::vector q_all; + ui p_1 = 0; + ui q_1 = 0; + for (auto &t : p_e) { + ui p = t.first; + ui q = t.second; + con_2[q] += 1; + con_2_2[p] += 1; + + if (p > p_1) { + p_1 = p; + } + if (q > q_1) { + q_1 = q; + } + } + + for (auto &t : i_e) { + // std::cout <<"p_1 and i_e[t].first" << p_1 << " " << i_e[t].first << std::endl; + if (t.first > p_1) { + p_1 = t.first; + } + if (t.second > q_1) { + q_1 = t.second; + } + } + + ui p_1_1 = p_1 + 1; + ui q_1_1 = q_1 + 1; + + static bool morphism[32]; + + for (ui i = 0; i < query_count; i++) { + morphism[i] = false; + for (ui j = 0; j < query_count; j++) { + if (i != j && + query_graph->getVertexLabel(order[i]) == query_graph->getVertexLabel(order[j])) { + morphism[i] = true; + break; + } + } + } + + auto f_5 = std::chrono::steady_clock::now(); + + if (order_count_2 <= 1) { + ui bfs_order_1 = 1; + + std::cout << "Enumerate 1" << std::endl; + enumeration_bfs2(visited, + res_all_1, + candidates_index, + candidates_count_index, + order, + tree, + res_r_1, + bfs_order_1, + query_count, + query_graph, + data_graph, + order_index, + connection, + offset, + res_1, + parent_offset_1, + candidates_l_1, + visit_local_2, + candidates_max_count, + morphism); + + int64_t embedding = exploreGraphQLStyle(data_graph, + query_graph, + candidates_index, + candidates_count_index, + order, + 10000000, + res_all_1); + + f_5 = std::chrono::steady_clock::now(); + + std::cout << embedding << "," << 0 << "," + << std::chrono::duration_cast(f_5 - f_4).count() / + 1000000000.0 + << "," + << std::chrono::duration_cast(f_5 - f_4).count() / + 1000000000.0 + << "," << res_all_1 << std::endl; + + // freopen("output.txt", "a", stdout); + // std::cout <(f_5 - f_4).count()/1000000000.0 << + // "," <(f_5 - f_4).count()/1000000000.0 << + // "," << res_all_1 << std::endl; fclose(stdout); + return true; + + } else { + ui bfs_order_1 = 1; + + std::cout << "Enumerate 1" << std::endl; + enumeration_bfs2(visited, + res_all_1, + candidates_index, + candidates_count_index, + order_1, + tree, + res_r_1, + bfs_order_1, + order_count_1, + query_graph, + data_graph, + order_index_1, + connection_1, + offset_1, + res_1, + parent_offset_1, + candidates_l_1, + visit_local_2, + candidates_max_count, + morphism_1); + + f_5 = std::chrono::steady_clock::now(); + } + std::cout << "Done with enumeration 1" << std::endl; + + auto f_6 = std::chrono::steady_clock::now(); + + static V_ID *visit_local_4 = new V_ID[data_count * query_count]; + std::fill(visit_local_4, visit_local_4 + data_count * query_count, 0); + + // (5) Parallel combination. + // In this section, we are using the parallel method. + + static int *res_s = new int[Q_LIMIT]; + + static bool **valid_1 = new bool *[query_count]; + static bool **valid_2 = new bool *[query_count]; + + for (ui i = 0; i < query_count; i++) { + valid_1[i] = new bool[candidates_l_1[order_count_1 - 1]]; + valid_2[i] = new bool[candidates_l_2[order_count_2 - 1]]; + std::fill(valid_1[i], valid_1[i] + candidates_l_1[order_count_1 - 1], false); + std::fill(valid_2[i], valid_2[i] + candidates_l_2[order_count_2 - 1], false); + } + + std::cout << "Test" << std::endl; + for (ui i = 0; i < candidates_l_1[order_count_1 - 1]; i++) { + valid_1[order_count_1 - 1][i] = true; + } + + for (ui i = 0; i < candidates_l_2[order_count_2 - 1]; i++) { + valid_2[order_count_2 - 1][i] = true; + } + + reverse_cuts(res_1, + parent_offset_1, + children_offset_1, + candidates_l_1, + order_count_1, + res_1_n, + parent_offset_1_n, + candidates_l_1_n, + valid_1); + reverse_cuts(res_2, + parent_offset_2, + children_offset_2, + candidates_l_2, + order_count_2, + res_2_n, + parent_offset_2_n, + candidates_l_2_n, + valid_2); + + // Print Children information:: + for (ui i = 0; i < query_count; i++) { + ui tmp_u = order[i]; + std::cout << tmp_u << ",children:["; + for (ui j = 0; j < tree[tmp_u].children_count; j++) { + std::cout << "," << tree[tmp_u].children[j]; + } + ui u_nbrs_count_1; + V_ID *u_nbrs_1 = query_graph->getVertexNeighbors(tmp_u, u_nbrs_count_1); + + std::cout << "]" << std::endl; + std::cout << "Neighborhood:["; + for (ui j = 0; j < u_nbrs_count_1; j++) { + std::cout << "," << u_nbrs_1[j]; + } + std::cout << "]" << std::endl; + } + + V_ID **res_1_r = new V_ID *[candidates_l_1[order_count_1 - 1]]; + V_ID **res_2_r = new V_ID *[candidates_l_2[order_count_2 - 1]]; + + for (int64_t i = 0; i < candidates_l_1[order_count_1 - 1]; i++) { + res_1_r[i] = new V_ID[order_count_1]; + for (ui j = 0; j < order_count_1; j++) { + res_1_r[i][j] = 0; + } + } + + std::cout << "Initial Test" << std::endl; + + for (int64_t i = 0; i < candidates_l_1[order_count_1 - 1]; i++) { + res_1_r[i][order_count_1 - 1] = res_1[order_count_1 - 1][i]; + int64_t t = i; + for (int j = order_count_1 - 2; j >= 0; j--) { + t = parent_offset_1[j + 1][t]; + res_1_r[i][j] = res_1[j][t]; + } + } + std::cout << "End point 1:" << std::endl; + for (int64_t i = 0; i < candidates_l_2[order_count_2 - 1]; i++) { + res_2_r[i] = new V_ID[order_count_2]; + } + for (int64_t i = 0; i < candidates_l_2[order_count_2 - 1]; i++) { + res_2_r[i][order_count_2 - 1] = res_2[order_count_2 - 1][i]; + int64_t t = i; + for (int j = order_count_2 - 2; j >= 0; j--) { + t = parent_offset_2[j + 1][t]; + res_2_r[i][j] = res_2[j][t]; + } + } + std::cout << std::endl << "Test all" << std::endl; + + bool *visit_q_1 = new bool[order_count_1]; + + std::fill(visit_q_1, visit_q_1 + order_count_1, false); + + p_1 = p_1_1 - 1; + std::cout << order_count_1 - p_1 << std::endl; + std::vector p_all; + for (ui i = 0; i < order_count_2; i++) { + if (con_2[i] > 0) { + q_all.push_back(i); + } + } + + for (ui i = 0; i < order_count_1; i++) { + if (con_2_2[i] > 0) { + p_all.push_back(i); + } + } + + int64_t call_count = 0; + + for (int64_t d_1 = 0; d_1 < candidates_l_1[order_count_1 - 1]; d_1++) { + res_s[d_1] = 0; + } + + std::cout << "I->" << candidates_l_1[order_count_1 - 1] << std::endl; + std::cout << "J->" << candidates_l_2[order_count_2 - 1] << std::endl; + + // double timer_middle = omp_get_wtime(); + int64_t total_sum = 0; + V_ID res_1_1[32]; + + // double timer_local_enumeration = omp_get_wtime(); + + bool *visited_j = new bool[candidates_l_2[order_count_2 - 1]]; + for (int64_t k = 0; k < candidates_l_2[order_count_2 - 1]; k++) { + visited_j[k] = false; + } + + if (candidates_l_1[order_count_1 - 1] < candidates_l_2[order_count_2 - 1]) { + for (int64_t d_1 = 0; d_1 < candidates_l_1[0]; d_1++) { + int64_t start_1, end_1; + ui order_s = 0; + find_children_range(children_offset_1, candidates_l_1, p_1_1, order_s, start_1, end_1, d_1); + // V_ID r_1,r_2; + int64_t d_2; + for (d_2 = 0; d_2 < candidates_l_2[0]; d_2++) { + if (res_1[0][d_1] == res_2[0][d_2]) { + break; + } + } + // std::cout <<"d_1" << std::endl; + for (int64_t i = start_1; i < end_1; i++) { + res_1_1[p_1] = res_1[p_1][i]; + int64_t j2 = i; + for (int ii = p_1; ii > 0; ii--) { + j2 = parent_offset_1[ii][j2]; + res_1_1[ii - 1] = res_1[ii - 1][j2]; + } + ui p2, q2; + for (auto &t : i_e) { + p2 = t.first; + visited[res_1_1[p2]] = true; + } + for (auto &t : p_e) { + p2 = t.first; + q2 = t.second; + int64_t tt = data_count * q2; + std::unordered_map> *v_nl = data_graph->getVertexNL(res_1_1[p2]); + std::vector &tmp = (*v_nl)[l_2[q2]]; + for (unsigned int m : tmp) { + visit_local_4[tt + m] += 1; + } + } + int64_t total = 0; + bool all_exist = false; + int64_t start_2[32], end_2[32]; + ui t, q, q_add; + t = 0; + int64_t call_count_2 = 0; + if (t < q_all.size()) { + q = q_all[t]; + q_add = q + 1; + int64_t tmp = data_count * q; + // Find the range of children + find_children_range( + children_offset_2, candidates_l_2, q_add, order_s, start_2[t], end_2[t], d_2); + if (q_add == order_count_2) { + std::cout << con_2[q] << "," << q << std::endl; + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + V_ID *res_t_2 = res_2_r[j]; + call_count_2 += 1; + if (visit_local_4[tmp + res_t_2[q]] >= con_2[q]) { + all_exist = true; + for (auto &t_2 : i_e) { + ui q_3 = t_2.second; + if (visited[res_t_2[q_3]]) { + all_exist = false; + break; + } + } + total += all_exist; + } + } + } else { + // std::cout << "t" << std::endl; + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + call_count_2++; + if (visit_local_4[tmp + res_2[q][j]] == con_2[q]) { + total += edge_next(res_2_r, + visited_j, + visit_local_4, + data_count, + con_2, + visited, + children_offset_2, + candidates_l_2, + q_add, + j, + order_count_2, + i_e, + q_all, + t, + start_2, + end_2, + res_2, + q_1, + call_count_2); + } + } + } + } else { + // bool all_exist; + find_children_range( + children_offset_2, candidates_l_2, order_count_2, order_s, start_2[t], end_2[t], d_2); + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + V_ID *res_t_2 = res_2_r[j]; + call_count_2 += 1; + all_exist = true; + for (auto &t_2 : i_e) { + call_count_2 += 1; + ui q_3 = t_2.second; + if (visited[res_t_2[q_3]]) { + all_exist = false; + break; + } + } + total += all_exist; + } + } + // #pragma omp end parallel for simd + if (p_1_1 == order_count_1) { + total_sum += total; + call_count += call_count_2; + } else { + int64_t start_1_1; + int64_t end_1_1; + int64_t pp = i; + find_children_range( + children_offset_1, candidates_l_1, order_count_1, p_1, start_1_1, end_1_1, pp); + // std::cout < " << tt << std::endl; + } + + for (auto &t2 : p_e) { + p2 = t2.first; + q2 = t2.second; + int64_t tt = data_count * q2; + std::unordered_map> *v_nl = data_graph->getVertexNL(res_1_1[p2]); + std::vector &tmp = (*v_nl)[l_2[q2]]; + for (unsigned int m : tmp) { + visit_local_4[tt + m] = 0; + } + } + + for (auto &t2 : i_e) { + p2 = t2.first; + visited[res_1_1[p2]] = false; + } + } + } + } else { + // Fix comment 1 + ui tmp_p_q = p_1_1; + p_1_1 = q_1_1; + q_1_1 = tmp_p_q; + + tmp_p_q = p_1; + p_1 = q_1; + q_1 = tmp_p_q; + + for (auto &t : i_e) { + ui tmp11 = t.first; + t.first = t.second; + t.second = tmp11; + } + for (auto &t : p_e) { + ui tmp11 = t.first; + t.first = t.second; + t.second = tmp11; + } + + for (int64_t d_1 = 0; d_1 < candidates_l_2[0]; d_1++) { + int64_t start_1, end_1; + ui order_s = 0; + find_children_range(children_offset_2, candidates_l_2, p_1_1, order_s, start_1, end_1, d_1); + // V_ID r_1,r_2; + int64_t d_2; + for (d_2 = 0; d_2 < candidates_l_1[0]; d_2++) { + if (res_2[0][d_1] == res_1[0][d_2]) { + break; + } + } + + // std::cout <<"d_1" << std::endl; + for (int64_t i = start_1; i < end_1; i++) { + res_1_1[p_1] = res_2[p_1][i]; + + for (int ii = p_1; ii > 0; ii--) { + int64_t j = i; + j = parent_offset_2[ii][j]; + res_1_1[ii - 1] = res_2[ii - 1][j]; + } + ui p2, q2; + for (auto &t : i_e) { + p2 = t.first; + visited[res_1_1[p2]] = true; + } + for (auto &t : p_e) { + p2 = t.first; + q2 = t.second; + int64_t tt = data_count * q2; + std::unordered_map> *v_nl = data_graph->getVertexNL(res_1_1[p2]); + std::vector &tmp = (*v_nl)[l_1[q2]]; + for (unsigned int m : tmp) { + visit_local_4[tt + m] += 1; + } + } + // Fix comment 2 + int64_t total = 0; + bool all_exist = false; + int64_t start_2[32], end_2[32]; + ui t, q, q_add; + t = 0; + int64_t call_count_2 = 0; + if (t < p_all.size()) { + q = p_all[t]; + q_add = q + 1; + int64_t tmp = data_count * q; + find_children_range( + children_offset_1, candidates_l_1, q_add, order_s, start_2[t], end_2[t], d_2); + if (q_add == order_count_1) { + std::cout << con_2_2[q] << "," << q << std::endl; + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + V_ID *res_t_2 = res_1_r[j]; + call_count_2 += 1; + if (visit_local_4[tmp + res_t_2[q]] >= con_2_2[q]) { + all_exist = true; + for (auto &t_2 : i_e) { + ui q_3 = t_2.second; + if (visited[res_t_2[q_3]]) { + all_exist = false; + break; + } + } + total += all_exist; + } + } + } else { + // std::cout <<"all-->" << std::endl; + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + call_count_2++; + if (visit_local_4[tmp + res_1[q][j]] == con_2_2[q]) { + total += edge_next(res_1_r, + visited_j, + visit_local_4, + data_count, + con_2_2, + visited, + children_offset_1, + candidates_l_1, + q_add, + j, + order_count_1, + i_e, + p_all, + t, + start_2, + end_2, + res_1, + q_1, + call_count_2); + } + } + } + } else { + // + find_children_range( + children_offset_1, candidates_l_1, order_count_1, order_s, start_2[t], end_2[t], d_2); + for (int64_t j = start_2[t]; j < end_2[t]; j++) { + V_ID *res_t_2 = res_1_r[j]; + call_count_2 += 1; + all_exist = true; + for (auto &t_2 : i_e) { + call_count_2 += 1; + ui q_3 = t_2.second; + if (visited[res_t_2[q_3]]) { + all_exist = false; + break; + } + } + total += all_exist; + } + } + // #pragma omp end parallel for simd + if (p_1_1 == order_count_2) { + total_sum += total; + call_count += call_count_2; + } else { + int64_t start_1_1; + int64_t end_1_1; + int64_t pp = i; + find_children_range( + children_offset_2, candidates_l_2, order_count_2, p_1, start_1_1, end_1_1, pp); + // std::cout < " << tt << std::endl; + } + + for (t = 0; t < p_e.size(); t++) { + p2 = p_e[t].first; + q2 = p_e[t].second; + int64_t tt = data_count * q2; + std::unordered_map> *v_nl = data_graph->getVertexNL(res_1_1[p2]); + std::vector &tmp = (*v_nl)[l_1[q2]]; + for (unsigned int m : tmp) { + visit_local_4[tt + m] = 0; + } + } + + for (t = 0; t < i_e.size(); t++) { + p2 = i_e[t].first; + visited[res_1_1[p2]] = false; + } + } + } + } + // std::cout <<"Total 2:" << total_2 << std::endl; + std::cout << "Data count" << data_count << std::endl; + total_result = total_sum; + + // double timer_end = omp_get_wtime(); + // double timer_took = + // timer_end - timer_middle + + // std::chrono::duration_cast(f_5 - f_4).count() / 1000000000.0 + + // std::chrono::duration_cast(f_6 - f_5).count() / + // 1000000000.0; // timer_start1; + // double timer_took3 = timer_took; + + std::cout << "call count->" << call_count << ", per call nanoseconds->" << std::endl; + //<< timer_took * 1000000000.0 / call_count << std::endl; + std::cout + << "total results:" << total_result + << std::endl; // ",Time:" << timer_took << ",local enumeration-->" + // << (timer_local_enumeration - timer_start1) * 100 / timer_took << std::endl; + + // (6) Global node based-enumeration result comparasion + // std::cout <<"Start enumeration" << endl; + u = order[0]; + + static V_ID **res = new V_ID *[query_count]; + static V_ID **cuTS = new V_ID *[query_count]; + + for (ui i = 0; i < query_count; ++i) { + res[i] = new V_ID[candidates_max_count]; + cuTS[i] = new V_ID[candidates_max_count]; + } + + int64_t res_all = 0; + + // int64_t total = 0; + + std::fill(visited, visited + data_count, false); + + // double timer_start = omp_get_wtime(); + auto first = std::chrono::steady_clock::now(); + + auto f_2 = first; + auto f_3 = first; + auto f_10 = first; + // traditional enumeration: + + f_10 = std::chrono::steady_clock::now(); + + if (tree[u].children_count == 0) { + int length = P_Candidates[u].size(); + for (int i = 0; i < length; i++) { + res_all += 1; + } + } else { + // ui current_order = 0; + ui bfs_order = 0; + query_count -= 1; + // int64_t single_res; + std::cout << "Start-->" << std::endl; + int64_t bfs_count = 0; + + for (const auto &it4 : P_Candidates[u]) { + res_all += 1; + // [current_order] = it4->first; + candidates_2[bfs_order][bfs_count++] = it4.first; + visited[it4.first] = true; + visited[it4.first] = false; + } + + f_2 = std::chrono::steady_clock::now(); + + candidates_l[bfs_order++] = bfs_count; + query_count += 1; + f_3 = std::chrono::steady_clock::now(); + } + + // auto second =std::chrono::steady_clock::now(); + std::cout << "P_1 Time->" + << std::chrono::duration_cast(f_5 - f_4).count() / + 1000000000.0 + << std::endl; + std::cout << "P_2 Time->" + << std::chrono::duration_cast(f_6 - f_5).count() / + 1000000000.0 + << std::endl; + std::cout << "DFS Time->" + << std::chrono::duration_cast(f_2 - first).count() / + 1000000000.0 + << std::endl; + + // timer_end = omp_get_wtime(); + std::cout << "BFS->" + << std::chrono::duration_cast(f_3 - f_2).count() / + 1000000000.0 + << std::endl; + // double last_r = timer_took; + // timer_took = timer_end - timer_start; + // std::cout << "Node total results:" << candidates_l[query_count - 1] - total_result + // << ",Time:" << timer_took << ", ratio->" + // << timer_took * 100 / (timer_took + timer_start1 - timer_all_s) << " %, Accelera + // rate->" + // << timer_took / last_r << std::endl; + + // freopen("output.txt", "a", stdout); + + // std::cout <(f_3 - f_2).count()/1000000000.0 + // << "," + // << call_count << std::endl; + + // fclose(stdout); + + return true; + // NTE Tree: +} +// +// +// +// NLF data_graph, query_graph, ID of the vertex node. total count. +// + +void ZFComputeNLF(Graph *data_graph, Graph *query_graph, V_ID i, ui &count, ui *tmp) { + // std::cout <<"Test 0" << std::endl; + L_ID label = query_graph->getVertexLabel(i); + ui degree = query_graph->getVertexDegree(i); + // std::cout <<"Test 00" << std::endl; + std::unordered_map *query_nlf = query_graph->getVertexNLF(i); + + // data vertex count; + ui data_v_count; + ui *data_v = data_graph->getVerticesByLabel(label, data_v_count); + count = 0; + // std::cout <<"Test 1" << std::endl; + for (ui j = 0; j < data_v_count; ++j) { + ui v = data_v[j]; + if (data_graph->getVertexDegree(v) >= degree) { + // NFL check + std::unordered_map *data_nlf = data_graph->getVertexNLF(v); + + if (data_nlf->size() >= query_nlf->size()) { + bool valid = true; + + // Label, count in the (nlf) + for (auto item : *query_nlf) { + auto element = data_nlf->find(item.first); + if (element == data_nlf->end() || element->second < item.second) { + valid = false; + break; + } + } + + if (valid) { + if (tmp != nullptr) { + tmp[count] = v; // + } + count += 1; // Recored count of the number + } + } + } + } +} diff --git a/src/graph/executor/subgraph_provenance/subgraph.h b/src/graph/executor/subgraph_provenance/subgraph.h new file mode 100644 index 00000000000..ee16377958d --- /dev/null +++ b/src/graph/executor/subgraph_provenance/subgraph.h @@ -0,0 +1,42 @@ +// Copyright [2022] +#ifndef SUBGRAPH_H +#define SUBGRAPH_H + +#include + +#include "graph.h" +#include "trees.h" + +bool CECIFunction(Graph *data_graph, + Graph *query_graph, + ui **&candidates, + ui *&candidates_count, + ui *&order, + ui *&provenance, + TreeNode *&tree, + std::vector>> &P_Candidates, + std::vector>> &P_Provenance); + +// static void FilterProvenance(std::unordered_map *u_nlf, +// ui u_l, +// ui u_d, +// V_ID &u, +// V_ID &v, +// std::unordered_map &Provenance, +// Graph *data_graph, +// std::vector &data_visited, +// ui **&candidates, +// ui *&candidates_count); +// static void Insertion(V_ID u, +// V_ID &tmp_first, +// V_ID &tmp_second, +// std::unordered_map>> &intersetion); + +// static V_ID ParentNode(V_ID first, V_ID second, TreeNode *&tree); + +// static void Initial(Graph *data_graph, Graph *query_graph, ui &candidates, ui +// *&candidates_count); static V_ID InitialStartVertex(Graph *data_graph, Graph *query_graph); + +void ZFComputeNLF(Graph *data_graph, Graph *query_graph, V_ID i, ui &count, ui *tmp = nullptr); + +#endif // SUBGRAPH_H diff --git a/src/graph/executor/subgraph_provenance/trees.h b/src/graph/executor/subgraph_provenance/trees.h new file mode 100644 index 00000000000..978a994a338 --- /dev/null +++ b/src/graph/executor/subgraph_provenance/trees.h @@ -0,0 +1,88 @@ +// Copyright [2022] +#ifndef CECI_TYPES_H +#define CECI_TYPES_H + +#include +#include + +using ui = unsigned int; // u is the query node. + +using V_ID = ui; +using L_ID = ui; + +class TreeNode { + public: + V_ID id; + V_ID parent; + ui level; + // ui under_level_count; + ui children_count; + // V_ID* under_level; + V_ID* children; + + // This field tracks the nodes that should participate in the intersection to generate the + // candidates for node "id". A.k.a, the nodes that are visited earlier than node "id", and should + // be intersected to generate the candidates for "node id". Note, this field excludes the "parent + // node" of node "id". + ui bn_count; + V_ID* bn; + + ui fn_count; + V_ID* fn; + size_t estimated_embeddings_num; + + public: + TreeNode() { + id = 0; + // under_level = nullptr; + bn = nullptr; + fn = nullptr; + children = nullptr; + parent = 99999; + level = 0; + // under_level_count = 0; + children_count = 0; + bn_count = 0; + fn_count = 0; + estimated_embeddings_num = 0; + } + + ~TreeNode() { + // delete[] under_level; + delete[] bn; + delete[] fn; + delete[] children; + } + + void initialize(const ui size) { + // under_level = new V_ID[size]; + bn = new V_ID[size]; + fn = new V_ID[size]; + children = new V_ID[size]; + } +}; + +class Edges { + public: + ui* offset; + ui* edge; + ui v_count; + ui e_count; + ui max_degree; + + public: + Edges() { + offset = nullptr; + edge = nullptr; + v_count = 0; + e_count = 0; + max_degree = 0; + } + + ~Edges() { + delete[] offset; + delete[] edge; + } +}; + +#endif // CECI_TYPES_H diff --git a/src/graph/planner/plan/Algo.h b/src/graph/planner/plan/Algo.h index b684129ca4b..9429ecbff69 100644 --- a/src/graph/planner/plan/Algo.h +++ b/src/graph/planner/plan/Algo.h @@ -339,6 +339,18 @@ class Isomor final : public SingleInputNode { return qctx->objPool()->makeAndAdd( qctx, input, dScanVOut, qScanVOut, dScanEOut, qScanEOut); } + const std::string& getdScanVOut() const { + return dScanVOut_; + } + const std::string& getqScanVOut() const { + return qScanVOut_; + } + const std::string& getdScanEOut() const { + return dScanEOut_; + } + const std::string& getqScanEOut() const { + return qScanEOut_; + } private: friend ObjectPool;