From 87265b846ec52bcfc394616f983aa62cbe4c90a8 Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Thu, 1 Aug 2024 16:13:07 -0700 Subject: [PATCH 01/19] WiP commit --- src/DescriptorsCommand.cc | 175 +++++++++++++++++---------- src/DescriptorsCommand.h | 10 +- src/QueryHandlerPMGD.cc | 20 ++- utils/src/api_schema/api_schema.json | 8 +- 4 files changed, 139 insertions(+), 74 deletions(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index df60d6de..bdf8cb8f 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -84,8 +84,10 @@ std::string DescriptorsCommand::get_set_path(PMGDQuery &query_tx, list_arr.append(VDMS_DESC_SET_DIM_PROP); list_arr.append(VDMS_DESC_SET_ENGIN_PROP); + results["list"] = list_arr; + bool unique = true; // Query set node @@ -94,7 +96,6 @@ std::string DescriptorsCommand::get_set_path(PMGDQuery &query_tx, true); Json::Value &query_responses = query.run(); - if (query_responses.size() != 1 && query_responses[0].size() != 1) { throw ExceptionCommand(DescriptorSetError, "PMGD Transaction Error"); } @@ -113,7 +114,6 @@ std::string DescriptorsCommand::get_set_path(PMGDQuery &query_tx, _desc_set_locator[set_name] = set_path; return set_path; } - return ""; } @@ -409,94 +409,137 @@ void AddDescriptor::retrieve_aws_descriptorSet(const std::string &set_path) { } } -int AddDescriptor::construct_protobuf(PMGDQuery &query, - const Json::Value &jsoncmd, - const std::string &blob, int grp_id, - Json::Value &error) { - const Json::Value &cmd = jsoncmd[_cmd_name]; +int AddDescriptor::add_single_descriptor(PMGDQuery &query, + const Json::Value &jsoncmd, + const std::string &blob, int grp_id, + Json::Value &error){ - const std::string set_name = cmd["set"].asString(); + const Json::Value &cmd = jsoncmd[_cmd_name]; + const std::string set_name = cmd["set"].asString(); - Json::Value props = get_value(cmd, "properties"); + Json::Value props = get_value(cmd, "properties"); - std::string label = get_value(cmd, "label", "None"); - props[VDMS_DESC_LABEL_PROP] = label; + std::string label = get_value(cmd, "label", "None"); + props[VDMS_DESC_LABEL_PROP] = label; - int dimensions; - const std::string set_path = get_set_path(query, set_name, dimensions); + int dimensions; + const std::string set_path = get_set_path(query, set_name, dimensions); - if (set_path.empty()) { - error["info"] = "Set " + set_name + " not found"; - error["status"] = RSCommand::Error; - return -1; - } + if (set_path.empty()) { + error["info"] = "Set " + set_name + " not found"; + error["status"] = RSCommand::Error; + return -1; + } - // retrieve the descriptor set from AWS here - // operations are currently done in memory with no subsequent write to disk - // so there's no need to re-upload to AWS - if (_use_aws_storage) { - retrieve_aws_descriptorSet(set_path); - } + // retrieve the descriptor set from AWS here + // operations are currently done in memory with no subsequent write to disk + // so there's no need to re-upload to AWS + if (_use_aws_storage) { + retrieve_aws_descriptorSet(set_path); + } - long id = insert_descriptor(blob, set_path, dimensions, label, error); + //TODO modify descriptor + long id = insert_descriptor(blob, set_path, dimensions, label, error); - if (id < 0) { - error["status"] = RSCommand::Error; + if (id < 0) { + error["status"] = RSCommand::Error; - if (_use_aws_storage) { - // delete files in set_path - std::uintmax_t n = fs::remove_all(set_path); - std::cout << "Deleted " << n << " files or directories\n"; + if (_use_aws_storage) { + // delete files in set_path + std::uintmax_t n = fs::remove_all(set_path); + std::cout << "Deleted " << n << " files or directories\n"; + } + + return -1; } - return -1; - } + props[VDMS_DESC_ID_PROP] = Json::Int64(id); - props[VDMS_DESC_ID_PROP] = Json::Int64(id); + int node_ref = get_value(cmd, "_ref", query.get_available_reference()); - int node_ref = get_value(cmd, "_ref", query.get_available_reference()); + query.AddNode(node_ref, VDMS_DESC_TAG, props, Json::nullValue); - query.AddNode(node_ref, VDMS_DESC_TAG, props, Json::nullValue); + // It passed the checker, so it exists. + int set_ref = query.get_available_reference(); - // It passed the checker, so it exists. - int set_ref = query.get_available_reference(); + Json::Value link; + Json::Value results; + Json::Value list_arr; + list_arr.append(VDMS_DESC_SET_PATH_PROP); + list_arr.append(VDMS_DESC_SET_DIM_PROP); + results["list"] = list_arr; - Json::Value link; - Json::Value results; - Json::Value list_arr; - list_arr.append(VDMS_DESC_SET_PATH_PROP); - list_arr.append(VDMS_DESC_SET_DIM_PROP); - results["list"] = list_arr; + Json::Value constraints; + Json::Value name_arr; + name_arr.append("=="); + name_arr.append(set_name); + constraints[VDMS_DESC_SET_NAME_PROP] = name_arr; - Json::Value constraints; - Json::Value name_arr; - name_arr.append("=="); - name_arr.append(set_name); - constraints[VDMS_DESC_SET_NAME_PROP] = name_arr; + bool unique = true; - bool unique = true; + // Query set node + query.QueryNode(set_ref, VDMS_DESC_SET_TAG, link, constraints, results, + unique); - // Query set node - query.QueryNode(set_ref, VDMS_DESC_SET_TAG, link, constraints, results, - unique); + if (cmd.isMember("link")) { + add_link(query, cmd["link"], node_ref, VDMS_DESC_EDGE_TAG); + } + + Json::Value props_edge; + query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); + + // TODO: deleting files here causes problems with concurrency (TestRetail.py) + // keeping local copies as a temporary solution + // if(_use_aws_storage) + // { + // //delete files in set_path + // std::uintmax_t n = fs::remove_all(set_path); + // std::cout << "Deleted " << n << " files or directories\n"; + // } + + return 0; + +} + +int AddDescriptor::add_descriptor_batch(PMGDQuery &query, + const Json::Value &jsoncmd, + const std::string &blob, int grp_id, + Json::Value &error){ + +} + +int AddDescriptor::construct_protobuf(PMGDQuery &query, + const Json::Value &jsoncmd, + const std::string &blob, int grp_id, + Json::Value &error) { + + bool batch_mode; + int rc; + const Json::Value &cmd = jsoncmd[_cmd_name]; + const std::string set_name = cmd["set"].asString(); + + + Json::Value prop_list = get_value(cmd, "propertieslist"); + if(prop_list.size() == 0){ + //todo check for _ref usage + batch_mode = false; + rc = add_single_descriptor(query, jsoncmd, blob, grp_id, error); + + } else { + printf("batch mode not implemented\n"); + exit(0); - if (cmd.isMember("link")) { - add_link(query, cmd["link"], node_ref, VDMS_DESC_EDGE_TAG); } - Json::Value props_edge; - query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); + /* + printf("property list size: %d\n", prop_list.size()); + for(Json::Value::ArrayIndex i = 0; i < prop_list.size(); i++){ + Json::Value prop_dict = prop_list[i]; + std::cout< _cache_map; static tbb::concurrent_unordered_map - _desc_set_locator; + _desc_set_locator; static tbb::concurrent_unordered_map _desc_set_dims; // Will return the path to the set and the dimensions @@ -132,6 +132,14 @@ class AddDescriptor : public DescriptorsCommand { void retrieve_aws_descriptorSet(const std::string &set_path); + int add_single_descriptor(PMGDQuery &tx, const Json::Value &root, + const std::string &blob, int grp_id, + Json::Value &error); + + int add_descriptor_batch(PMGDQuery &tx, const Json::Value &root, + const std::string &blob, int grp_id, + Json::Value &error); + public: AddDescriptor(); diff --git a/src/QueryHandlerPMGD.cc b/src/QueryHandlerPMGD.cc index 66a1cfb6..33f97e12 100644 --- a/src/QueryHandlerPMGD.cc +++ b/src/QueryHandlerPMGD.cc @@ -63,8 +63,7 @@ std::unordered_map QueryHandlerPMGD::_rs_cmds; // DescriptorCommand.h tbb::concurrent_unordered_map DescriptorsCommand::_desc_set_locator; -tbb::concurrent_unordered_map - DescriptorsCommand::_desc_set_dims; +tbb::concurrent_unordered_map DescriptorsCommand::_desc_set_dims; void QueryHandlerPMGD::init() { DescriptorsManager::init(); @@ -131,6 +130,7 @@ QueryHandlerPMGD::QueryHandlerPMGD() bool QueryHandlerPMGD::syntax_checker(const Json::Value &root, Json::Value &error) { + printf("Syntax Checker\n"); valijson::ValidationResults results; valijson::adapters::JsonCppAdapter user_query(root); if (!_validator.validate(*_schema, user_query, &results)) { @@ -199,7 +199,8 @@ bool QueryHandlerPMGD::syntax_checker(const Json::Value &root, int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, Json::Value &root) { - Json::Reader reader; + printf("Parse commands Checker\n"); + Json::Reader reader; const std::string commands = proto_query.json(); try { @@ -244,12 +245,12 @@ int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, root["status"] = RSCommand::Error; return -1; } - + printf("Parse COmmands complete\n"); return 0; } void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, - protobufs::queryMessage &proto_res) { + protobufs::queryMessage &proto_res) { //TODO Investigate why/where json throwing Json::FastWriter fastWriter; Json::Value root; @@ -297,18 +298,20 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, Json::StyledWriter w; std::cerr << w.write(json_responses); }; - + printf("Parse Command Pre-\n"); if (parse_commands(proto_query, root) != 0) { cmd_current = "Transaction"; error(root, cmd_current); return; } + printf("Parse Commands Completed, back in proc-query\n"); PMGDQuery pmgd_query(_pmgd_qh); int blob_count = 0; // iterate over the list of the queries for (int j = 0; j < root.size(); j++) { + printf("Iterating over incoming queries: %d...\n", j); const Json::Value &query = root[j]; std::string cmd = query.getMemberNames()[0]; @@ -342,9 +345,11 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, construct_results.push_back(cmd_result); } + printf("Running PMGD Query\n"); timers.add_timestamp("pmgd_query_time"); Json::Value &tx_responses = pmgd_query.run(_autodelete_init); timers.add_timestamp("pmgd_query_time"); + printf("PMGD query Complete\n"); if (!tx_responses.isArray() || tx_responses.size() != root.size()) { Json::StyledWriter writer; @@ -365,6 +370,7 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, } else { blob_count = 0; for (int j = 0; j < root.size(); j++) { + printf("Response iteration: %d\n", j); Json::Value &query = root[j]; std::string cmd = query.getMemberNames()[0]; @@ -400,8 +406,10 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, if (output_query_level_timing) { timers.print_map_runtimes(); } + printf("Writing JSON responses...\n"); proto_res.set_json(fastWriter.write(json_responses)); _pmgd_qh.cleanup_files(); + printf("Cleaning up!\n"); } catch (VCL::Exception &e) { print_exception(e); diff --git a/utils/src/api_schema/api_schema.json b/utils/src/api_schema/api_schema.json index 9725c663..0057afbf 100644 --- a/utils/src/api_schema/api_schema.json +++ b/utils/src/api_schema/api_schema.json @@ -75,6 +75,11 @@ "definitions": { // misc + "propertyArray": { + "type": "array", + "items": {"type" : "object"}, + "minimum" : 1 + }, "positiveInt": { "type": "integer", @@ -744,7 +749,8 @@ "label": { "type": "string" }, "_ref": { "$ref": "#/definitions/refInt" }, "link": { "$ref": "#/definitions/blockLink" }, - "properties": { "type": "object" } + "properties": { "type": "object" }, + "propertieslist": {"$ref": "#/definitions/propertyArray"} }, "required": ["set"], "additionalProperties": false From 5b2f0a55e553b63541c98f54193e9898c4d2d37c Mon Sep 17 00:00:00 2001 From: Rohit Verma Date: Mon, 5 Aug 2024 03:09:06 -0700 Subject: [PATCH 02/19] udf_server filename update --- remote_function/udf_server.py | 12 +++--------- tests/remote_function_test/udf_server.py | 12 +++--------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/remote_function/udf_server.py b/remote_function/udf_server.py index af044a50..7dacd972 100644 --- a/remote_function/udf_server.py +++ b/remote_function/udf_server.py @@ -1,15 +1,12 @@ from flask import Flask, request, jsonify, send_file, after_this_request import cv2 -import numpy as np import json from datetime import datetime, timezone import os import sys -from collections import defaultdict, deque -import skvideo.io -import imutils import uuid from zipfile import ZipFile +from werkzeug.utils import secure_filename for entry in os.scandir("functions"): if entry.is_file(): @@ -18,9 +15,6 @@ app = Flask(__name__) -count = 0 - - def get_current_timestamp(): dt = datetime.now(timezone.utc) @@ -42,7 +36,7 @@ def image_api(): format = json_data["format"] if "format" in json_data else "jpg" - tmpfile = "tmpfile" + uuid.uuid1().hex + "." + str(format) + tmpfile = secure_filename("tmpfile" + uuid.uuid1().hex + "." + str(format)) image_data.save(tmpfile) @@ -70,7 +64,7 @@ def video_api(): video_data = request.files["videoData"] format = json_data["format"] if "format" in json_data else "mp4" - tmpfile = "tmpfile" + uuid.uuid1().hex + "." + str(format) + tmpfile = secure_filename("tmpfile" + uuid.uuid1().hex + "." + str(format)) video_data.save(tmpfile) video_file, metadata_file = "", "" diff --git a/tests/remote_function_test/udf_server.py b/tests/remote_function_test/udf_server.py index 7cb9526d..d197ddb9 100644 --- a/tests/remote_function_test/udf_server.py +++ b/tests/remote_function_test/udf_server.py @@ -1,15 +1,12 @@ from flask import Flask, request, jsonify, send_file, after_this_request import cv2 -import numpy as np import json from datetime import datetime, timezone import os import sys -from collections import defaultdict, deque -import skvideo.io -import imutils import uuid from zipfile import ZipFile +from werkzeug.utils import secure_filename for entry in os.scandir("functions"): if entry.is_file(): @@ -18,9 +15,6 @@ app = Flask(__name__) -count = 0 - - def get_current_timestamp(): dt = datetime.now(timezone.utc) @@ -42,7 +36,7 @@ def image_api(): format = json_data["format"] if "format" in json_data else "jpg" - tmpfile = "tmpfile" + uuid.uuid1().hex + "." + str(format) + tmpfile = secure_filename("tmpfile" + uuid.uuid1().hex + "." + str(format)) image_data.save(tmpfile) @@ -70,7 +64,7 @@ def video_api(): video_data = request.files["videoData"] format = json_data["format"] if "format" in json_data else "mp4" - tmpfile = "tmpfile" + uuid.uuid1().hex + "." + str(format) + tmpfile = secure_filename("tmpfile" + uuid.uuid1().hex + "." + str(format)) video_data.save(tmpfile) video_file, metadata_file = "", "" From b1e141ae63fb738e77fcff5e03dc7f2cfa3b1c3c Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Mon, 5 Aug 2024 13:47:48 -0700 Subject: [PATCH 03/19] Initial batch development compiles and superficially functioning, needs refinement and verification testing --- src/DescriptorsCommand.cc | 156 +++++++++++++++++++++++++++++++++----- 1 file changed, 135 insertions(+), 21 deletions(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index bdf8cb8f..363ba151 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -341,8 +341,9 @@ AddDescriptor::AddDescriptor() : DescriptorsCommand("AddDescriptor") { //_use_aws_storage = VDMSConfig::instance()->get_aws_flag(); } +//update to handle multiple descriptors at a go long AddDescriptor::insert_descriptor(const std::string &blob, - const std::string &set_path, int dim, + const std::string &set_path, int nr_desc, const std::string &label, Json::Value &error) { long id_first; @@ -351,21 +352,23 @@ long AddDescriptor::insert_descriptor(const std::string &blob, VCL::DescriptorSet *desc_set = _dm->get_descriptors_handler(set_path); - if (blob.length() / 4 != dim) { + //TODO this check no longer applies, should move it elsewhere + /*if (blob.length() / 4 != dim) { std::cerr << "AddDescriptor::insert_descriptor: "; std::cerr << "Dimensions mismatch: "; std::cerr << blob.length() / 4 << " " << dim << std::endl; error["info"] = "Blob Dimensions Mismatch"; return -1; - } + }*/ + if (!label.empty()) { long label_id = desc_set->get_label_id(label); long *label_ptr = &label_id; - id_first = desc_set->add((float *)blob.data(), 1, label_ptr); + id_first = desc_set->add((float *)blob.data(), nr_desc, label_ptr); } else { - id_first = desc_set->add((float *)blob.data(), 1); + id_first = desc_set->add((float *)blob.data(), nr_desc); } if (output_vcl_timing) { @@ -422,8 +425,8 @@ int AddDescriptor::add_single_descriptor(PMGDQuery &query, std::string label = get_value(cmd, "label", "None"); props[VDMS_DESC_LABEL_PROP] = label; - int dimensions; - const std::string set_path = get_set_path(query, set_name, dimensions); + int dim; + const std::string set_path = get_set_path(query, set_name, dim); if (set_path.empty()) { error["info"] = "Set " + set_name + " not found"; @@ -431,6 +434,14 @@ int AddDescriptor::add_single_descriptor(PMGDQuery &query, return -1; } + if (blob.length() / 4 != dim) { + std::cerr << "AddDescriptor::insert_descriptor: "; + std::cerr << "Dimensions mismatch: "; + std::cerr << blob.length() / 4 << " " << dim << std::endl; + error["info"] = "Blob Dimensions Mismatch"; + return -1; + } + // retrieve the descriptor set from AWS here // operations are currently done in memory with no subsequent write to disk // so there's no need to re-upload to AWS @@ -438,8 +449,8 @@ int AddDescriptor::add_single_descriptor(PMGDQuery &query, retrieve_aws_descriptorSet(set_path); } - //TODO modify descriptor - long id = insert_descriptor(blob, set_path, dimensions, label, error); + //TODO modify insert descriptor to handle batches + long id = insert_descriptor(blob, set_path, 1, label, error); if (id < 0) { error["status"] = RSCommand::Error; @@ -506,6 +517,117 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, const std::string &blob, int grp_id, Json::Value &error){ + int expected_blb_size; + int nr_expected_descs; + int dimensions; + + //Extract set name + const Json::Value &cmd = jsoncmd[_cmd_name]; + const std::string set_name = cmd["set"].asString(); + + //extract properties list and get filepath/object location of set + Json::Value prop_list = get_value(cmd, "propertieslist"); + const std::string set_path = get_set_path(query, set_name, dimensions); + + if (set_path.empty()) { + error["info"] = "Set " + set_name + " not found"; + error["status"] = RSCommand::Error; + return -1; + } + + std::string label = get_value(cmd, "label", "None"); + + // retrieve the descriptor set from AWS here + // operations are currently done in memory with no subsequent write to disk + // so there's no need to re-upload to AWS + if (_use_aws_storage) { + retrieve_aws_descriptorSet(set_path); + } + + // Note dimensionse are based on a 32 bit integer, hence the /4 math on size + // as the string blob is sized in 8 bit ints. + nr_expected_descs = prop_list.size(); + expected_blb_size = nr_expected_descs * dimensions * 4; + + //Verify length of input is matching expectations + if (blob.length() != expected_blb_size) { + std::cerr << "AddDescriptor::insert_descriptor: "; + std::cerr << "Expectected Blob Length Does Not Match Input "; + std::cerr << blob.length() << " != " << expected_blb_size << std::endl; + error["info"] = "FV Input Length Mismatch"; + return -1; + } + + //TODO modify insert descriptor to handle batches + long id = insert_descriptor(blob, set_path, nr_expected_descs, label, error); + + if (id < 0) { + error["status"] = RSCommand::Error; + + if (_use_aws_storage) { + // delete files in set_path + std::uintmax_t n = fs::remove_all(set_path); + std::cout << "Deleted " << n << " files or directories\n"; + } + + return -1; + } + + //get reference tag for source node for ID + + + // Loop over properties list, add relevant query, link, and edges for each + for(int i=0; i < nr_expected_descs; i++) { + int node_ref = query.get_available_reference(); + Json::Value cur_props; + cur_props = prop_list[i]; + //TODO Note using iterator to modify ID return, we're gonna want to watch this closely. + cur_props[VDMS_DESC_ID_PROP] = Json::Int64(id+i); + + + query.AddNode(node_ref, VDMS_DESC_TAG, cur_props, Json::nullValue); + + // It passed the checker, so it exists. + int set_ref = query.get_available_reference(); + + Json::Value link; + Json::Value results; + Json::Value list_arr; + list_arr.append(VDMS_DESC_SET_PATH_PROP); + list_arr.append(VDMS_DESC_SET_DIM_PROP); + results["list"] = list_arr; + + //constraints for getting set node to link to. + Json::Value constraints; + Json::Value name_arr; + name_arr.append("=="); + name_arr.append(set_name); + constraints[VDMS_DESC_SET_NAME_PROP] = name_arr; + + bool unique = true; + + // Query set node-We only need to do this once, outside of the loop TODO MOVE + query.QueryNode(set_ref, VDMS_DESC_SET_TAG, link, constraints, results, + unique); + + //note this implicitly means that every node of a batch uses the same link + if (cmd.isMember("link")) { + add_link(query, cmd["link"], node_ref, VDMS_DESC_EDGE_TAG); + } + + Json::Value props_edge; + query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); + } + + + /* TODO example iteration over properties list + * TODO update API to call field "batch_properties" + printf("property list size: %d\n", prop_list.size()); + for(Json::Value::ArrayIndex i = 0; i < prop_list.size(); i++){ + Json::Value prop_dict = prop_list[i]; + std::cout<(cmd, "propertieslist"); if(prop_list.size() == 0){ - //todo check for _ref usage - batch_mode = false; + printf("Adding Single Descriptor\n"); rc = add_single_descriptor(query, jsoncmd, blob, grp_id, error); - } else { - printf("batch mode not implemented\n"); - exit(0); - + printf("Adding Descriptor Batch\n"); + rc = add_descriptor_batch(query, jsoncmd, blob, grp_id, error); } - /* - printf("property list size: %d\n", prop_list.size()); - for(Json::Value::ArrayIndex i = 0; i < prop_list.size(); i++){ - Json::Value prop_dict = prop_list[i]; - std::cout< Date: Wed, 7 Aug 2024 22:44:33 -0700 Subject: [PATCH 04/19] vdms client fix --- client/python/vdms/vdms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/client/python/vdms/vdms.py b/client/python/vdms/vdms.py index 59c72595..7ca21a41 100644 --- a/client/python/vdms/vdms.py +++ b/client/python/vdms/vdms.py @@ -85,6 +85,7 @@ def connect(self, host="localhost", port=55555): if self.use_tls: context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + context.minimum_version = ssl.TLSVersion.TLSv1_2 if self.ca_file != "": context.load_verify_locations(cafile=self.ca_file) if self.cert_file != "" and self.key_file != "": From 08b38702428a1325ef6f980e659bad8291bb3e95 Mon Sep 17 00:00:00 2001 From: sys_vdms Date: Thu, 8 Aug 2024 07:51:36 +0000 Subject: [PATCH 05/19] Automated updates: Format and/or coverage --- .github/coverage/python.develop.coverage_report.txt | 4 ++-- .github/coverage/python.develop.coverage_value.txt | 2 +- remote_function/udf_server.py | 1 + tests/remote_function_test/udf_server.py | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/coverage/python.develop.coverage_report.txt b/.github/coverage/python.develop.coverage_report.txt index eddfefcb..9f0fd59c 100644 --- a/.github/coverage/python.develop.coverage_report.txt +++ b/.github/coverage/python.develop.coverage_report.txt @@ -1,6 +1,6 @@ Name Stmts Miss Cover Missing -------------------------------------------------------------------- /vdms/client/python/vdms/__init__.py 2 0 100% -/vdms/client/python/vdms/vdms.py 98 2 98% 151, 166 +/vdms/client/python/vdms/vdms.py 99 2 98% 152, 167 -------------------------------------------------------------------- -TOTAL 100 2 98% +TOTAL 101 2 98% diff --git a/.github/coverage/python.develop.coverage_value.txt b/.github/coverage/python.develop.coverage_value.txt index 6529ff88..e2b6d0f9 100644 --- a/.github/coverage/python.develop.coverage_value.txt +++ b/.github/coverage/python.develop.coverage_value.txt @@ -1 +1 @@ -98 +98.02 diff --git a/remote_function/udf_server.py b/remote_function/udf_server.py index 7dacd972..17a5caca 100644 --- a/remote_function/udf_server.py +++ b/remote_function/udf_server.py @@ -15,6 +15,7 @@ app = Flask(__name__) + def get_current_timestamp(): dt = datetime.now(timezone.utc) diff --git a/tests/remote_function_test/udf_server.py b/tests/remote_function_test/udf_server.py index d197ddb9..939034ae 100644 --- a/tests/remote_function_test/udf_server.py +++ b/tests/remote_function_test/udf_server.py @@ -15,6 +15,7 @@ app = Flask(__name__) + def get_current_timestamp(): dt = datetime.now(timezone.utc) From 89717c481a6d7e72f6822ba291389553c6cf94d3 Mon Sep 17 00:00:00 2001 From: sys_vdms Date: Thu, 8 Aug 2024 09:13:52 +0000 Subject: [PATCH 06/19] Automated updates: Format and/or coverage --- .github/coverage/cpp.develop.coverage_report.txt | 4 ++-- .github/coverage/cpp.develop.coverage_value.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/coverage/cpp.develop.coverage_report.txt b/.github/coverage/cpp.develop.coverage_report.txt index 672a7da5..4b669fb3 100644 --- a/.github/coverage/cpp.develop.coverage_report.txt +++ b/.github/coverage/cpp.develop.coverage_report.txt @@ -4,7 +4,7 @@ Directory: .. ------------------------------------------------------------------------------ File Lines Exec Cover Missing ------------------------------------------------------------------------------ -client/cpp/CSVParserUtil.cpp 345 291 84% 37-45,48,50,239,241,264-265,269-270,286,292,304,313-314,317,323,331-332,335,345,351,363,368,373,379-387,389,425,435-437,474-476,478,503-506 +client/cpp/CSVParserUtil.cpp 345 290 84% 37-45,48,50,239,241,264-265,269-270,286,292,304,313-314,317,323,331-332,335,345,351,363,368,373,379-387,389,425,435-437,450,474-476,478,503-506 client/cpp/VDMSClient.cc 20 20 100% src/AutoDeleteNode.cc 9 8 88% 40 src/BackendNeo4j.cc 121 0 0% 4,6-17,20,24,29-41,46-47,52,55-58,61-62,64-70,73,78,82-83,85-86,89,92,95-96,98,102,104,106-109,111,114-116,118,122,131-132,138,140,142-144,147,150-152,155-159,161-175,178,182,184,186,195,197-200,204-205,207-208,211-215,220,224-226,228 @@ -57,5 +57,5 @@ utils/src/comm/Exception.cc 6 0 0% 35-40 utils/src/stats/SystemStats.cc 250 249 99% 453 utils/src/timers/TimerMap.cc 82 75 91% 126,151,153,155-158 ------------------------------------------------------------------------------ -TOTAL 10076 6488 64% +TOTAL 10076 6487 64% ------------------------------------------------------------------------------ diff --git a/.github/coverage/cpp.develop.coverage_value.txt b/.github/coverage/cpp.develop.coverage_value.txt index 7136165a..4778368d 100644 --- a/.github/coverage/cpp.develop.coverage_value.txt +++ b/.github/coverage/cpp.develop.coverage_value.txt @@ -1 +1 @@ -64.3906 +64.3807 From 28d9c79785654286f85cefd71b015e59d227d8ff Mon Sep 17 00:00:00 2001 From: sys_vdms Date: Thu, 8 Aug 2024 10:34:21 +0000 Subject: [PATCH 07/19] Automated updates: Format and/or coverage --- .github/coverage/cpp.develop.coverage_report.txt | 6 +++--- .github/coverage/cpp.develop.coverage_value.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/coverage/cpp.develop.coverage_report.txt b/.github/coverage/cpp.develop.coverage_report.txt index 4b669fb3..9c077a25 100644 --- a/.github/coverage/cpp.develop.coverage_report.txt +++ b/.github/coverage/cpp.develop.coverage_report.txt @@ -4,7 +4,7 @@ Directory: .. ------------------------------------------------------------------------------ File Lines Exec Cover Missing ------------------------------------------------------------------------------ -client/cpp/CSVParserUtil.cpp 345 290 84% 37-45,48,50,239,241,264-265,269-270,286,292,304,313-314,317,323,331-332,335,345,351,363,368,373,379-387,389,425,435-437,450,474-476,478,503-506 +client/cpp/CSVParserUtil.cpp 345 291 84% 37-45,48,50,239,241,264-265,269-270,286,292,304,313-314,317,323,331-332,335,345,351,363,368,373,379-387,389,425,435-437,474-476,478,503-506 client/cpp/VDMSClient.cc 20 20 100% src/AutoDeleteNode.cc 9 8 88% 40 src/BackendNeo4j.cc 121 0 0% 4,6-17,20,24,29-41,46-47,52,55-58,61-62,64-70,73,78,82-83,85-86,89,92,95-96,98,102,104,106-109,111,114-116,118,122,131-132,138,140,142-144,147,150-152,155-159,161-175,178,182,184,186,195,197-200,204-205,207-208,211-215,220,224-226,228 @@ -51,11 +51,11 @@ src/VDMSConfig.cc 181 168 92% 119-121,196,19 src/VideoCommand.cc 474 117 24% 50,53-54,56-58,60,62,65-66,68-69,72,74-76,78-80,82,84-87,89-90,92-93,95,97-99,102,109,111,116,121-124,130,132,158-161,167-168,170,181,184,201,213,217-220,227-229,231-233,239,241-247,249-250,253-255,257-259,261-262,264,266-278,280-282,284-285,296,300,325,329,331,333,335,337,340-341,343,346,350,352,357-358,380-381,383-384,387-392,394,396,398-399,405,407,429-431,436,442-445,449-454,456-463,467-473,475,480-485,488,490-491,494-496,504,509,527-532,535-539,555,558,560-562,565-567,569-570,572-576,579-580,583-585,587,589-591,594-597,601-606,611-612,614-615,617-621,624-626,628,630-632,634-637,640-641,644,646,651,664,666-673,677,680,683,688-689,691-695,698-699,701,703,705,708,712,714,716-719,721-723,726,728,730,732-733,735-736,740,745,748-749,751-753,755,757,759-761,763-764,767-769,773-776,780-786,790-794,798,801,803,805,807,809-813,817-821,824-825,827-830,833-836,841-842,846-851,855-856,859-860 src/VideoLoop.cc 249 200 80% 33,81,98-101,103-109,180,188,197,201,207,211,217,220,290,312,315,324-325,327,331-332,334-335,339-342,344,346-349,351-354,356-357,359,361,370 utils/src/comm/ConnClient.cc 69 57 82% 49,55,59-60,98,103,108,114,120,127,130,149 -utils/src/comm/Connection.cc 82 61 74% 48-53,75,77-79,84,86,97,111,135,140,153,157,159,168,172 +utils/src/comm/Connection.cc 82 62 75% 48-53,75,77-79,84,86,97,111,135,140,153,157,159,168 utils/src/comm/ConnServer.cc 61 49 80% 60,64,68,75,84,91,103,108,128,135,140,145 utils/src/comm/Exception.cc 6 0 0% 35-40 utils/src/stats/SystemStats.cc 250 249 99% 453 utils/src/timers/TimerMap.cc 82 75 91% 126,151,153,155-158 ------------------------------------------------------------------------------ -TOTAL 10076 6487 64% +TOTAL 10076 6489 64% ------------------------------------------------------------------------------ diff --git a/.github/coverage/cpp.develop.coverage_value.txt b/.github/coverage/cpp.develop.coverage_value.txt index 4778368d..e99eee9f 100644 --- a/.github/coverage/cpp.develop.coverage_value.txt +++ b/.github/coverage/cpp.develop.coverage_value.txt @@ -1 +1 @@ -64.3807 +64.4006 From 9c5a1b1156ff9b5623d87674da7a08333ea0648b Mon Sep 17 00:00:00 2001 From: sys_vdms Date: Thu, 8 Aug 2024 13:10:31 +0000 Subject: [PATCH 08/19] Automated updates: Format and/or coverage --- .github/coverage/cpp.develop.coverage_report.txt | 4 ++-- .github/coverage/cpp.develop.coverage_value.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/coverage/cpp.develop.coverage_report.txt b/.github/coverage/cpp.develop.coverage_report.txt index 9c077a25..672a7da5 100644 --- a/.github/coverage/cpp.develop.coverage_report.txt +++ b/.github/coverage/cpp.develop.coverage_report.txt @@ -51,11 +51,11 @@ src/VDMSConfig.cc 181 168 92% 119-121,196,19 src/VideoCommand.cc 474 117 24% 50,53-54,56-58,60,62,65-66,68-69,72,74-76,78-80,82,84-87,89-90,92-93,95,97-99,102,109,111,116,121-124,130,132,158-161,167-168,170,181,184,201,213,217-220,227-229,231-233,239,241-247,249-250,253-255,257-259,261-262,264,266-278,280-282,284-285,296,300,325,329,331,333,335,337,340-341,343,346,350,352,357-358,380-381,383-384,387-392,394,396,398-399,405,407,429-431,436,442-445,449-454,456-463,467-473,475,480-485,488,490-491,494-496,504,509,527-532,535-539,555,558,560-562,565-567,569-570,572-576,579-580,583-585,587,589-591,594-597,601-606,611-612,614-615,617-621,624-626,628,630-632,634-637,640-641,644,646,651,664,666-673,677,680,683,688-689,691-695,698-699,701,703,705,708,712,714,716-719,721-723,726,728,730,732-733,735-736,740,745,748-749,751-753,755,757,759-761,763-764,767-769,773-776,780-786,790-794,798,801,803,805,807,809-813,817-821,824-825,827-830,833-836,841-842,846-851,855-856,859-860 src/VideoLoop.cc 249 200 80% 33,81,98-101,103-109,180,188,197,201,207,211,217,220,290,312,315,324-325,327,331-332,334-335,339-342,344,346-349,351-354,356-357,359,361,370 utils/src/comm/ConnClient.cc 69 57 82% 49,55,59-60,98,103,108,114,120,127,130,149 -utils/src/comm/Connection.cc 82 62 75% 48-53,75,77-79,84,86,97,111,135,140,153,157,159,168 +utils/src/comm/Connection.cc 82 61 74% 48-53,75,77-79,84,86,97,111,135,140,153,157,159,168,172 utils/src/comm/ConnServer.cc 61 49 80% 60,64,68,75,84,91,103,108,128,135,140,145 utils/src/comm/Exception.cc 6 0 0% 35-40 utils/src/stats/SystemStats.cc 250 249 99% 453 utils/src/timers/TimerMap.cc 82 75 91% 126,151,153,155-158 ------------------------------------------------------------------------------ -TOTAL 10076 6489 64% +TOTAL 10076 6488 64% ------------------------------------------------------------------------------ diff --git a/.github/coverage/cpp.develop.coverage_value.txt b/.github/coverage/cpp.develop.coverage_value.txt index e99eee9f..7136165a 100644 --- a/.github/coverage/cpp.develop.coverage_value.txt +++ b/.github/coverage/cpp.develop.coverage_value.txt @@ -1 +1 @@ -64.4006 +64.3906 From 6a9dc71eb00da9201ee84e96de90feac2c3987ed Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Thu, 15 Aug 2024 13:23:35 -0700 Subject: [PATCH 09/19] batch inserts working --- src/DescriptorsCommand.cc | 10 ++- tests/python/TestDescriptors.py | 113 ++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 3 deletions(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index 363ba151..a15ced85 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -574,7 +574,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, } //get reference tag for source node for ID - + printf("Base ID for insertion: %d\n", id); // Loop over properties list, add relevant query, link, and edges for each for(int i=0; i < nr_expected_descs; i++) { @@ -650,6 +650,7 @@ int AddDescriptor::construct_protobuf(PMGDQuery &query, rc = add_descriptor_batch(query, jsoncmd, blob, grp_id, error); } + if(rc < 0) error["status"] = RSCommand::Error; return rc; @@ -849,7 +850,7 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, // Case (1) if (cmd.isMember("link")) { - + printf("Link Case for FindDesc\n"); // Query for the Descriptors related to user-defined link // that match the user-defined constraints // We will need to do the AND operation @@ -863,13 +864,15 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, Json::Value link_to_desc; link_to_desc["ref"] = desc_ref; - // Query for the set + // Query for the set RESET TO UNIQUE FOR BOOLEAN FALSE query.QueryNode(-1, VDMS_DESC_SET_TAG, link_to_desc, constraints_set, results_set, unique); } // Case (2) else if (!cmd.isMember("k_neighbors")) { + printf("Regular Case for Find Desc\n"); + // In this case, we either need properties of the descriptor // ("list") on the results block, or we need the descriptor nodes // because the user defined a reference. @@ -892,6 +895,7 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, } // Case (3), Just want the descriptor by value, we only need the set else { + printf("KNN Case\n"); Json::Value link_null; // null const int k_neighbors = get_value(cmd, "k_neighbors", 0); diff --git a/tests/python/TestDescriptors.py b/tests/python/TestDescriptors.py index 0d6d4be2..7abe8030 100644 --- a/tests/python/TestDescriptors.py +++ b/tests/python/TestDescriptors.py @@ -206,6 +206,119 @@ def test_addSetAndDescriptorsDimMismatch(self): self.disconnect(db) + def test_AddSetAndWrongBatchSize(self): + + + db = self.create_connection() + + # Create and verify descriptor set + trans_list = [] + trans_dict = {} + desc_set = {} + desc_set["engine"] = "FaissFlat" + desc_set["metric"] = "L2" + desc_set["name"] = "wrongbatchsize" + desc_set["dimensions"] = 128 + trans_dict["AddDescriptorSet"] = desc_set + + trans_list.append(trans_dict) + + response, img_array = db.query(trans_list) + self.assertEqual(response[0]["AddDescriptorSet"]["status"],0) + + # Create and add a batch of feature vectors + trans = [] + blobs = [] + nr_dims = 128 + batch_size = 10 + desc_blob = [] + x = np.zeros(nr_dims * batch_size) + x = x.astype("float32") + desc_blob.append(x.tobytes()) + + properties_list=[] + for x in range(batch_size + 3): + props = {"batchprop": x} + properties_list.append(props) + + descriptor = {} + descriptor["set"] = "wrongbatchsize" + descriptor["propertieslist"] = properties_list + query = {} + query["AddDescriptor"] = descriptor + trans.append(query) + blobs.append(desc_blob) + + response, img_array = db.query(trans, blobs) + self.assertEqual(response[0]["info"], "FV Input Length Mismatch") + self.assertEqual(response[0]["status"], -1) + + self.disconnect(db) + + def test_AddSetAndInsertBatch(self): + + db = self.create_connection() + + # Create and verify descriptor set + trans_list = [] + trans_dict = {} + desc_set = {} + desc_set["engine"] = "FaissFlat" + desc_set["metric"] = "L2" + desc_set["name"] = "rightbatchsize" + desc_set["dimensions"] = 128 + trans_dict["AddDescriptorSet"] = desc_set + + trans_list.append(trans_dict) + + response, img_array = db.query(trans_list) + self.assertEqual(response[0]["AddDescriptorSet"]["status"],0) + + # Create and add a batch of feature vectors + trans = [] + blobs = [] + nr_dims = 128 + batch_size = 10 + desc_blob = [] + x = np.zeros(nr_dims * batch_size) + x = x.astype("float32") + desc_blob.append(x.tobytes()) + + properties_list=[] + for x in range(batch_size): + props = {"batchprop": x} + properties_list.append(props) + + descriptor = {} + descriptor["set"] = "rightbatchsize" + descriptor["propertieslist"] = properties_list + query = {} + query["AddDescriptor"] = descriptor + trans.append(query) + blobs.append(desc_blob) + + response, img_array = db.query(trans, blobs) + print(response) + self.assertEqual(response[0]["AddDescriptor"]["status"], 0) + + # now try to get those same descriptors back + desc_find = {} + desc_find["set"] = "rightbatchsize" + desc_find["results"] = {"list":["batchprop"]} + + query = {} + query["FindDescriptor"] = desc_find + + trans = [] + blobs = [] + trans.append(query) + response, img_array = db.query(trans, blobs) + print(response) + + self.disconnect(db) + + + def test_classifyDescriptor(self): db = self.create_connection() set_name = "features_128d_4_classify" From 753c9d2eb6a3af45fbb0fec00a8c5b345291aa44 Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Fri, 16 Aug 2024 10:46:06 -0700 Subject: [PATCH 10/19] cruft cleanup, renamed batch field to batch properties --- src/DescriptorsCommand.cc | 23 +++-------------------- tests/python/TestDescriptors.py | 7 +++---- utils/src/api_schema/api_schema.json | 2 +- 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index a15ced85..bbaec89e 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -526,7 +526,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, const std::string set_name = cmd["set"].asString(); //extract properties list and get filepath/object location of set - Json::Value prop_list = get_value(cmd, "propertieslist"); + Json::Value prop_list = get_value(cmd, "batch_properties"); const std::string set_path = get_set_path(query, set_name, dimensions); if (set_path.empty()) { @@ -574,8 +574,6 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, } //get reference tag for source node for ID - printf("Base ID for insertion: %d\n", id); - // Loop over properties list, add relevant query, link, and edges for each for(int i=0; i < nr_expected_descs; i++) { int node_ref = query.get_available_reference(); @@ -619,14 +617,6 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); } - - /* TODO example iteration over properties list - * TODO update API to call field "batch_properties" - printf("property list size: %d\n", prop_list.size()); - for(Json::Value::ArrayIndex i = 0; i < prop_list.size(); i++){ - Json::Value prop_dict = prop_list[i]; - std::cout<(cmd, "propertieslist"); + Json::Value prop_list = get_value(cmd, "batch_properties"); if(prop_list.size() == 0){ - printf("Adding Single Descriptor\n"); rc = add_single_descriptor(query, jsoncmd, blob, grp_id, error); } else { - printf("Adding Descriptor Batch\n"); rc = add_descriptor_batch(query, jsoncmd, blob, grp_id, error); } @@ -850,7 +838,6 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, // Case (1) if (cmd.isMember("link")) { - printf("Link Case for FindDesc\n"); // Query for the Descriptors related to user-defined link // that match the user-defined constraints // We will need to do the AND operation @@ -864,15 +851,12 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, Json::Value link_to_desc; link_to_desc["ref"] = desc_ref; - // Query for the set RESET TO UNIQUE FOR BOOLEAN FALSE + // Query for the set query.QueryNode(-1, VDMS_DESC_SET_TAG, link_to_desc, constraints_set, results_set, unique); } // Case (2) else if (!cmd.isMember("k_neighbors")) { - - printf("Regular Case for Find Desc\n"); - // In this case, we either need properties of the descriptor // ("list") on the results block, or we need the descriptor nodes // because the user defined a reference. @@ -895,7 +879,6 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, } // Case (3), Just want the descriptor by value, we only need the set else { - printf("KNN Case\n"); Json::Value link_null; // null const int k_neighbors = get_value(cmd, "k_neighbors", 0); diff --git a/tests/python/TestDescriptors.py b/tests/python/TestDescriptors.py index 7abe8030..cee23db9 100644 --- a/tests/python/TestDescriptors.py +++ b/tests/python/TestDescriptors.py @@ -243,7 +243,7 @@ def test_AddSetAndWrongBatchSize(self): descriptor = {} descriptor["set"] = "wrongbatchsize" - descriptor["propertieslist"] = properties_list + descriptor["batch_properties"] = properties_list query = {} query["AddDescriptor"] = descriptor trans.append(query) @@ -291,7 +291,7 @@ def test_AddSetAndInsertBatch(self): descriptor = {} descriptor["set"] = "rightbatchsize" - descriptor["propertieslist"] = properties_list + descriptor["batch_properties"] = properties_list query = {} query["AddDescriptor"] = descriptor trans.append(query) @@ -313,12 +313,11 @@ def test_AddSetAndInsertBatch(self): blobs = [] trans.append(query) response, img_array = db.query(trans, blobs) - print(response) + #print(response) self.disconnect(db) - def test_classifyDescriptor(self): db = self.create_connection() set_name = "features_128d_4_classify" diff --git a/utils/src/api_schema/api_schema.json b/utils/src/api_schema/api_schema.json index 0057afbf..5b337e78 100644 --- a/utils/src/api_schema/api_schema.json +++ b/utils/src/api_schema/api_schema.json @@ -750,7 +750,7 @@ "_ref": { "$ref": "#/definitions/refInt" }, "link": { "$ref": "#/definitions/blockLink" }, "properties": { "type": "object" }, - "propertieslist": {"$ref": "#/definitions/propertyArray"} + "batch_properties": {"$ref": "#/definitions/propertyArray"} }, "required": ["set"], "additionalProperties": false From a355336040854fb57fd0bee5141b8fe30966bc20 Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Fri, 16 Aug 2024 10:49:46 -0700 Subject: [PATCH 11/19] additional debug statement removal --- src/QueryHandlerPMGD.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/QueryHandlerPMGD.cc b/src/QueryHandlerPMGD.cc index 33f97e12..2c17895d 100644 --- a/src/QueryHandlerPMGD.cc +++ b/src/QueryHandlerPMGD.cc @@ -130,7 +130,6 @@ QueryHandlerPMGD::QueryHandlerPMGD() bool QueryHandlerPMGD::syntax_checker(const Json::Value &root, Json::Value &error) { - printf("Syntax Checker\n"); valijson::ValidationResults results; valijson::adapters::JsonCppAdapter user_query(root); if (!_validator.validate(*_schema, user_query, &results)) { @@ -199,7 +198,6 @@ bool QueryHandlerPMGD::syntax_checker(const Json::Value &root, int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, Json::Value &root) { - printf("Parse commands Checker\n"); Json::Reader reader; const std::string commands = proto_query.json(); @@ -245,7 +243,6 @@ int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, root["status"] = RSCommand::Error; return -1; } - printf("Parse COmmands complete\n"); return 0; } @@ -298,20 +295,19 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, Json::StyledWriter w; std::cerr << w.write(json_responses); }; - printf("Parse Command Pre-\n"); + if (parse_commands(proto_query, root) != 0) { cmd_current = "Transaction"; error(root, cmd_current); return; } - printf("Parse Commands Completed, back in proc-query\n"); PMGDQuery pmgd_query(_pmgd_qh); int blob_count = 0; // iterate over the list of the queries for (int j = 0; j < root.size(); j++) { - printf("Iterating over incoming queries: %d...\n", j); + const Json::Value &query = root[j]; std::string cmd = query.getMemberNames()[0]; @@ -345,11 +341,11 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, construct_results.push_back(cmd_result); } - printf("Running PMGD Query\n"); + timers.add_timestamp("pmgd_query_time"); Json::Value &tx_responses = pmgd_query.run(_autodelete_init); timers.add_timestamp("pmgd_query_time"); - printf("PMGD query Complete\n"); + if (!tx_responses.isArray() || tx_responses.size() != root.size()) { Json::StyledWriter writer; @@ -370,7 +366,6 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, } else { blob_count = 0; for (int j = 0; j < root.size(); j++) { - printf("Response iteration: %d\n", j); Json::Value &query = root[j]; std::string cmd = query.getMemberNames()[0]; @@ -406,10 +401,10 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, if (output_query_level_timing) { timers.print_map_runtimes(); } - printf("Writing JSON responses...\n"); + proto_res.set_json(fastWriter.write(json_responses)); _pmgd_qh.cleanup_files(); - printf("Cleaning up!\n"); + } catch (VCL::Exception &e) { print_exception(e); From 1f8186be5e19d7bc536dc9f6582f281a8bbcf508 Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Mon, 19 Aug 2024 13:36:57 -0700 Subject: [PATCH 12/19] Fixed bug on batch insert, was not correctly including descriptor labels as part of metadata --- src/DescriptorsCommand.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index bbaec89e..a74d77aa 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -525,6 +525,8 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, const Json::Value &cmd = jsoncmd[_cmd_name]; const std::string set_name = cmd["set"].asString(); + Json::Value props = get_value(cmd, "properties"); + //extract properties list and get filepath/object location of set Json::Value prop_list = get_value(cmd, "batch_properties"); const std::string set_path = get_set_path(query, set_name, dimensions); @@ -536,6 +538,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, } std::string label = get_value(cmd, "label", "None"); + props[VDMS_DESC_LABEL_PROP] = label; // retrieve the descriptor set from AWS here // operations are currently done in memory with no subsequent write to disk @@ -581,7 +584,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, cur_props = prop_list[i]; //TODO Note using iterator to modify ID return, we're gonna want to watch this closely. cur_props[VDMS_DESC_ID_PROP] = Json::Int64(id+i); - + cur_props[VDMS_DESC_LABEL_PROP] = label; query.AddNode(node_ref, VDMS_DESC_TAG, cur_props, Json::nullValue); From 374a3ecf9baebfb67c307abbd60b2819c1d47619 Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Mon, 19 Aug 2024 13:46:10 -0700 Subject: [PATCH 13/19] tweaked test after last fix --- tests/python/TestDescriptors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/TestDescriptors.py b/tests/python/TestDescriptors.py index cee23db9..edc705d1 100644 --- a/tests/python/TestDescriptors.py +++ b/tests/python/TestDescriptors.py @@ -298,7 +298,6 @@ def test_AddSetAndInsertBatch(self): blobs.append(desc_blob) response, img_array = db.query(trans, blobs) - print(response) self.assertEqual(response[0]["AddDescriptor"]["status"], 0) # now try to get those same descriptors back @@ -313,7 +312,7 @@ def test_AddSetAndInsertBatch(self): blobs = [] trans.append(query) response, img_array = db.query(trans, blobs) - #print(response) + self.assertEqual(response[0]["FindDescriptor"]["returned"],10) self.disconnect(db) From 82a979fc48237f6855bd79651ef2db3735f62d7d Mon Sep 17 00:00:00 2001 From: Ian Adams Date: Thu, 22 Aug 2024 13:39:29 -0700 Subject: [PATCH 14/19] test updates, continuing cleanup --- src/DescriptorsCommand.cc | 81 +++++++++++++++++-------------- tests/python/TestDescriptors.py | 86 +++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 37 deletions(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index a74d77aa..91bc38c2 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -352,16 +352,6 @@ long AddDescriptor::insert_descriptor(const std::string &blob, VCL::DescriptorSet *desc_set = _dm->get_descriptors_handler(set_path); - //TODO this check no longer applies, should move it elsewhere - /*if (blob.length() / 4 != dim) { - std::cerr << "AddDescriptor::insert_descriptor: "; - std::cerr << "Dimensions mismatch: "; - std::cerr << blob.length() / 4 << " " << dim << std::endl; - error["info"] = "Blob Dimensions Mismatch"; - return -1; - }*/ - - if (!label.empty()) { long label_id = desc_set->get_label_id(label); long *label_ptr = &label_id; @@ -449,7 +439,6 @@ int AddDescriptor::add_single_descriptor(PMGDQuery &query, retrieve_aws_descriptorSet(set_path); } - //TODO modify insert descriptor to handle batches long id = insert_descriptor(blob, set_path, 1, label, error); if (id < 0) { @@ -517,6 +506,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, const std::string &blob, int grp_id, Json::Value &error){ + const int FOUR_BYTE_INT = 4; int expected_blb_size; int nr_expected_descs; int dimensions; @@ -525,7 +515,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, const Json::Value &cmd = jsoncmd[_cmd_name]; const std::string set_name = cmd["set"].asString(); - Json::Value props = get_value(cmd, "properties"); + //Json::Value props = get_value(cmd, "properties"); //extract properties list and get filepath/object location of set Json::Value prop_list = get_value(cmd, "batch_properties"); @@ -538,7 +528,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, } std::string label = get_value(cmd, "label", "None"); - props[VDMS_DESC_LABEL_PROP] = label; + //props[VDMS_DESC_LABEL_PROP] = label; // retrieve the descriptor set from AWS here // operations are currently done in memory with no subsequent write to disk @@ -550,18 +540,17 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, // Note dimensionse are based on a 32 bit integer, hence the /4 math on size // as the string blob is sized in 8 bit ints. nr_expected_descs = prop_list.size(); - expected_blb_size = nr_expected_descs * dimensions * 4; + expected_blb_size = nr_expected_descs * dimensions * FOUR_BYTE_INT; //Verify length of input is matching expectations if (blob.length() != expected_blb_size) { std::cerr << "AddDescriptor::insert_descriptor: "; - std::cerr << "Expectected Blob Length Does Not Match Input "; - std::cerr << blob.length() << " != " << expected_blb_size << std::endl; + std::cerr << "Expected Blob Length Does Not Match Input "; + std::cerr << "Input Length: " < Date: Mon, 26 Aug 2024 11:49:25 -0700 Subject: [PATCH 15/19] typo in cout, fixed --- src/DescriptorsCommand.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index 91bc38c2..1276333c 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -546,7 +546,7 @@ int AddDescriptor::add_descriptor_batch(PMGDQuery &query, if (blob.length() != expected_blb_size) { std::cerr << "AddDescriptor::insert_descriptor: "; std::cerr << "Expected Blob Length Does Not Match Input "; - std::cerr << "Input Length: " < Date: Thu, 29 Aug 2024 13:42:22 -0700 Subject: [PATCH 16/19] fixed minor typo --- tests/python/TestDescriptors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/TestDescriptors.py b/tests/python/TestDescriptors.py index 327fc957..0488950a 100644 --- a/tests/python/TestDescriptors.py +++ b/tests/python/TestDescriptors.py @@ -385,7 +385,7 @@ def test_AddBatchAndFindKNN(self): descriptor_blob = [] x = np.ones(128) - x[2] = x[2] = 2.34 + 1 * 20 # 2.34 + 1*20 + x[2] = 2.34 + 1 * 20 # 2.34 + 1*20 x = x.astype("float32") descriptor_blob.append(x.tobytes()) From a1bacf798283aa5267fcfc0446cfa6ee8deec38d Mon Sep 17 00:00:00 2001 From: "Chaunte W. Lacewell" Date: Wed, 11 Sep 2024 11:31:05 -0700 Subject: [PATCH 17/19] Fix #213: Report coverage values on manual approvals (#217) * Run Update job even if comparison fails in order to update coverage reports (accounts for manual approvals) * Automated updates: Format and/or coverage * Modify to always run update job --------- Co-authored-by: sys_vdms --- .../coverage/cpp.develop.coverage_report.txt | 6 +- .../coverage/cpp.develop.coverage_value.txt | 2 +- .github/workflows/CI.yml | 3 +- .github/workflows/_CI_coverage.yml | 5 + .github/workflows/_CI_update.yml | 9 +- src/DescriptorsCommand.cc | 379 +++++++++--------- src/DescriptorsCommand.h | 10 +- src/QueryHandlerPMGD.cc | 14 +- tests/python/TestDescriptors.py | 18 +- 9 files changed, 225 insertions(+), 221 deletions(-) diff --git a/.github/coverage/cpp.develop.coverage_report.txt b/.github/coverage/cpp.develop.coverage_report.txt index 672a7da5..c60a3f56 100644 --- a/.github/coverage/cpp.develop.coverage_report.txt +++ b/.github/coverage/cpp.develop.coverage_report.txt @@ -11,7 +11,7 @@ src/BackendNeo4j.cc 121 0 0% 4,6-17,20,24,2 src/BlobCommand.cc 87 66 75% 76,130-132,136-139,145,147,165,186-189,192-196,202 src/BoundingBoxCommand.cc 180 4 2% 45,49,51,53-54,56-59,62,64-67,70-73,76,83,87,90-91,93-97,101,103,105,114,118,122-123,125-132,137-138,140-144,147-150,152,154-160,162-165,167-169,171-173,176-177,179-181,183-184,186-187,190,193,196-197,199,201-204,206-210,213,215-219,222-223,225-227,229-237,240-244,246,251-256,259-261,263,265-266,268,270,272-274,276-277,281-283,286,288,292-294,296,298,300-303,307-308,310-313,316-319,321-326,329-330,335,338-339,341 src/CommunicationManager.cc 46 0 0% 42-43,46-47,49-50,52-54,57,61-66,68-71,73-81,84,86-88,93,96-97,100-101,105,107-108,110,113-116 -src/DescriptorsCommand.cc 602 107 17% 56,63-68,73,75-79,81-85,87,89,92-93,96,98-99,102,104-105,108-114,117,120,123,170-172,176,190-194,234-245,255,269-271,275,290-297,299,311-314,319,324-328,344,352,354-359,362-365,368,371-372,374-378,381,384,386-387,390-392,394-395,397-398,401-402,404-406,412,416,418,420,422-423,426,428-431,437-438,441,443-444,446,448-449,452,455,457,459,462,464-469,471-475,477,480,483-484,487-488,499,502,505,507-509,517,521,523,526,528-531,534-539,541-545,547,550,552,554,557,560-561,563,565,567-572,575,577-579,582-583,585,587,589-590,592,594,596-598,600,602-607,612-613,616,618,620,627-628,631,635,637,640,642-645,648-652,654-658,660,662-665,667-669,672,676-685,690-691,694,701,703,706-707,710,714,720,722,725,728-729,733,738,740,742,745,748-749,751-754,758,762-763,765,767-769,771,773-774,776,778-780,782-784,789-791,794-795,797,800-804,808,811,815,817-818,820-821,823,825-826,828-830,832,837,839-840,842-844,846-847,851,853-856,858-861,866,869-871,873,875,877-880,882-886,889-890,893,895,897,899-903,906-907,910-913,915,917-924,929,931,933-934,937-940,942,944,946-953,957-962,968,971,973,975-978,981,983,986-989,991-995,1001,1003,1005-1008,1013,1015,1017-1018,1021-1023,1025,1027,1029-1032,1035-1036,1038-1039,1041,1043-1044,1046-1052,1056-1057,1061-1062,1064-1065,1073-1074,1077-1078,1080,1082-1089,1093,1097-1098,1102-1106,1111-1112,1114 +src/DescriptorsCommand.cc 668 107 16% 56,63-68,73,75-79,81-85,87,89,92-93,96-98,101,103-104,107-113,115,118,121,168-170,174,188-192,232-243,253,267-269,273,288-295,297,309-312,317,322-326,343,351,353-356,359,362-363,365-369,372,375,377-378,381-383,385-386,388-389,392-393,395-397,403,408-409,411,413-414,417,419-422,425-430,436-437,440,442-443,445,447-448,451,454,456,458,461,463-468,470-474,476,479,482-483,486-487,498,501,506,512-513,518-519,521-524,527,533-534,539-540,543-549,552,554-555,557,559-560,562-563,567,569-574,577-582,585,588-593,595,621-622,625-626,629,632,639-640,642-644,646,649-650,652,655,658,660-662,670,674,676,679,681-684,687-692,694-698,700,703,705,707,710,713-714,716,718,720-725,728,730-732,735-736,738,740,742-743,745,747,749-751,753,755-760,765-766,769,771,773,780-781,784,788,790,793,795-798,801-805,807-811,813,815-818,820-822,825,829-838,843-844,847,853,855,858-859,862,866,871,873,876,879-880,884,889,891,893,896,899-900,902-905,909,913-914,916,918-920,922,924-925,927,929-931,933-935,940-942,945-946,948,951-955,959,962,966,968-969,971-972,974,976-977,979-981,983,988,990-991,993-995,997-998,1002,1004-1007,1009-1012,1017,1020-1022,1024,1026,1028-1031,1033-1037,1040-1041,1044,1046,1048,1050-1054,1057-1058,1061-1064,1066,1068-1075,1080,1082,1084-1085,1088-1091,1093,1095,1097-1104,1108-1113,1119,1122,1124,1126-1129,1132,1134,1137-1140,1142-1146,1152,1154,1156-1159,1164,1166,1168-1169,1172-1174,1176,1178,1180-1183,1186-1187,1189-1190,1192,1194-1195,1197-1203,1207-1208,1212-1213,1215-1216,1224-1225,1228-1229,1231,1233-1240,1244,1248-1249,1253-1257,1262-1263,1265 src/DescriptorsManager.cc 24 19 79% 49-50,57-58,73 src/ExceptionsCommand.cc 6 0 0% 35-40 src/ImageCommand.cc 322 157 48% 55,59,63,65,67-69,71,73-76,78,81,86,88-89,97,99,106,109,111-112,114-115,117-118,120-121,124,151,162-163,174-175,177,182-185,195-196,198,203-206,221-229,231-233,246-247,257-267,269-270,272-273,278,286,297,304,308,311,313,315,337,339-340,343-348,350,352,374-376,379-381,385-388,394,396,403-406,420,427,433-436,440-441,452-455,458-463,468-470,481-484,489-493,498-499,501-502,504-508,511,513-517,520-523,526-527,530,532,537 @@ -25,7 +25,7 @@ src/PMGDQueryHandler.cc 623 517 83% 82-84,166-167, src/QueryHandlerBase.cc 32 6 18% 26-29,35-36,38,42-43,45,47-48,52,56-58,60-62,64-66,68-69,72-73 src/QueryHandlerExample.cc 33 18 54% 65-67,75-78,84-85,89-92,94-95 src/QueryHandlerNeo4j.cc 139 0 0% 53,55-56,58,60-62,64-65,67,70-76,80-81,83-87,91,93,95-100,104-108,111-115,119-126,129-132,134-136,139-147,149-153,159,162,169,172-175,177-179,181,184-187,189-190,192,194,197,199,201-204,207-208,210,212-213,216,218,222-223,225,229,231-232,235,237-240,243-247,250-254,256-257,261-268,271-274,277 -src/QueryHandlerPMGD.cc 344 226 65% 109-111,119-122,137-138,142-146,149-153,157-164,167-170,172-174,183-185,189-191,209-211,216-218,233-239,243-245,262,264-273,302-304,350-352,354-356,359-360,362-365,388-389,391-392,401,407-419,421-423,430-438,475,477,532-533,535-536 +src/QueryHandlerPMGD.cc 344 226 65% 109-111,119-122,137-138,142-146,149-153,157-164,167-170,172-174,183-185,189-191,209-211,216-218,233-239,243-245,263,265-274,303-305,352-354,356-358,361-362,364-367,390-391,393-394,403,410-422,424-426,433-441,478,480,535-536,538-539 src/QueryMessage.cc 12 0 0% 37-39,42-43,45-46,48,51-54 src/RSCommand.cc 144 105 72% 65-67,73-74,98,100-101,103,110,131,134-138,141,172-174,176,178-181,188,262,285,287-289,291-297,301 src/SearchExpression.cc 99 38 38% 59,132-133,135,137-139,143,146,148-153,157,160,168-170,177,180-181,183-185,188,192-195,197,201,217-222,224-225,227,235-240,243,247-249,252-256,263,276,284-285 @@ -57,5 +57,5 @@ utils/src/comm/Exception.cc 6 0 0% 35-40 utils/src/stats/SystemStats.cc 250 249 99% 453 utils/src/timers/TimerMap.cc 82 75 91% 126,151,153,155-158 ------------------------------------------------------------------------------ -TOTAL 10076 6488 64% +TOTAL 10142 6488 64% ------------------------------------------------------------------------------ diff --git a/.github/coverage/cpp.develop.coverage_value.txt b/.github/coverage/cpp.develop.coverage_value.txt index 7136165a..50b05238 100644 --- a/.github/coverage/cpp.develop.coverage_value.txt +++ b/.github/coverage/cpp.develop.coverage_value.txt @@ -1 +1 @@ -64.3906 +63.9716 diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9397495f..5c5f22ff 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -55,9 +55,10 @@ jobs: Update: # name: Lint & Update Reports needs: [Testing, Results] - if: ${{ always() && (needs.Results.result == 'success') }} + if: ${{ always() }} uses: ./.github/workflows/_CI_update.yml with: coverage_value_updated: ${{ needs.Testing.outputs.coverage_value_updated }} + coverage_test_status: ${{ needs.Testing.outputs.coverage_test_status }} secrets: inherit diff --git a/.github/workflows/_CI_coverage.yml b/.github/workflows/_CI_coverage.yml index 2183ee0f..765aca59 100644 --- a/.github/workflows/_CI_coverage.yml +++ b/.github/workflows/_CI_coverage.yml @@ -103,6 +103,7 @@ jobs: source_old_cpp_value: ${{ steps.report_coverage.outputs.source_old_cpp_value }} source_old_py_value: ${{ steps.report_coverage.outputs.source_old_py_value }} coverage_value_updated: ${{ steps.report_coverage.outputs.cov_changed }} + coverage_test_status: ${{ steps.report_coverage.outputs.coverage_test_status }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: @@ -236,6 +237,7 @@ jobs: set -x did_cov_change='false' + test_status='failed' if [ "$pr_dev_value_cpp" != "$coverage_value_cpp" ]; then did_cov_change='true' fi @@ -262,10 +264,13 @@ jobs: then exit 1 fi + + test_status='passed' echo "Source Python Coverage: ${coverage_value_py}" echo "source_coverage_py=${coverage_value_py}" >> $GITHUB_OUTPUT echo "source_old_py_value=${pr_dev_value_py}" >> $GITHUB_OUTPUT # echo "target_coverage_py=${target_value_py}" >> $GITHUB_OUTPUT + echo "coverage_test_status=${test_status}" >> $GITHUB_OUTPUT - name: Upload New coverage results uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 diff --git a/.github/workflows/_CI_update.yml b/.github/workflows/_CI_update.yml index cec65624..5f67cd4d 100644 --- a/.github/workflows/_CI_update.yml +++ b/.github/workflows/_CI_update.yml @@ -7,6 +7,10 @@ on: required: true description: "The C++ Coverage for target" type: string + coverage_test_status: + required: true + description: "Status of coverage tests (passed/failed)" + type: string permissions: write-all @@ -19,7 +23,6 @@ jobs: runs-on: ubuntu-latest steps: # Checkout code doesn't persist across jobs - # If formatting needed, checkout and format again - name: Checkout Source Branch uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: @@ -31,7 +34,7 @@ jobs: - run: mkdir -p ${{ env.DOCKER_ARTIFACT_DIR }} - name: Retrieve Current Coverage Files - if: ${{ inputs.coverage_value_updated }} == 'true' + if: ${{ inputs.coverage_value_updated }} == 'true' && ${{ inputs.coverage_test_status }} == 'passed' uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 with: name: coverage_artifact @@ -43,7 +46,7 @@ jobs: - name: Update coverage reports with latest coverage # Change latest coverage as develop (future target) - if: ${{ inputs.coverage_value_updated }} == 'true' + if: ${{ inputs.coverage_value_updated }} == 'true' && ${{ inputs.coverage_test_status }} == 'passed' run: | cd ${GITHUB_WORKSPACE}/.github/coverage/ rm -rf *.develop.*.txt || true diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index 1276333c..aaf5737e 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -84,10 +84,8 @@ std::string DescriptorsCommand::get_set_path(PMGDQuery &query_tx, list_arr.append(VDMS_DESC_SET_DIM_PROP); list_arr.append(VDMS_DESC_SET_ENGIN_PROP); - results["list"] = list_arr; - bool unique = true; // Query set node @@ -341,7 +339,7 @@ AddDescriptor::AddDescriptor() : DescriptorsCommand("AddDescriptor") { //_use_aws_storage = VDMSConfig::instance()->get_aws_flag(); } -//update to handle multiple descriptors at a go +// update to handle multiple descriptors at a go long AddDescriptor::insert_descriptor(const std::string &blob, const std::string &set_path, int nr_desc, const std::string &label, @@ -403,231 +401,232 @@ void AddDescriptor::retrieve_aws_descriptorSet(const std::string &set_path) { } int AddDescriptor::add_single_descriptor(PMGDQuery &query, - const Json::Value &jsoncmd, - const std::string &blob, int grp_id, - Json::Value &error){ - - const Json::Value &cmd = jsoncmd[_cmd_name]; - const std::string set_name = cmd["set"].asString(); + const Json::Value &jsoncmd, + const std::string &blob, int grp_id, + Json::Value &error) { - Json::Value props = get_value(cmd, "properties"); + const Json::Value &cmd = jsoncmd[_cmd_name]; + const std::string set_name = cmd["set"].asString(); - std::string label = get_value(cmd, "label", "None"); - props[VDMS_DESC_LABEL_PROP] = label; + Json::Value props = get_value(cmd, "properties"); - int dim; - const std::string set_path = get_set_path(query, set_name, dim); + std::string label = get_value(cmd, "label", "None"); + props[VDMS_DESC_LABEL_PROP] = label; - if (set_path.empty()) { - error["info"] = "Set " + set_name + " not found"; - error["status"] = RSCommand::Error; - return -1; - } + int dim; + const std::string set_path = get_set_path(query, set_name, dim); - if (blob.length() / 4 != dim) { - std::cerr << "AddDescriptor::insert_descriptor: "; - std::cerr << "Dimensions mismatch: "; - std::cerr << blob.length() / 4 << " " << dim << std::endl; - error["info"] = "Blob Dimensions Mismatch"; - return -1; - } + if (set_path.empty()) { + error["info"] = "Set " + set_name + " not found"; + error["status"] = RSCommand::Error; + return -1; + } - // retrieve the descriptor set from AWS here - // operations are currently done in memory with no subsequent write to disk - // so there's no need to re-upload to AWS - if (_use_aws_storage) { - retrieve_aws_descriptorSet(set_path); - } + if (blob.length() / 4 != dim) { + std::cerr << "AddDescriptor::insert_descriptor: "; + std::cerr << "Dimensions mismatch: "; + std::cerr << blob.length() / 4 << " " << dim << std::endl; + error["info"] = "Blob Dimensions Mismatch"; + return -1; + } - long id = insert_descriptor(blob, set_path, 1, label, error); + // retrieve the descriptor set from AWS here + // operations are currently done in memory with no subsequent write to disk + // so there's no need to re-upload to AWS + if (_use_aws_storage) { + retrieve_aws_descriptorSet(set_path); + } - if (id < 0) { - error["status"] = RSCommand::Error; + long id = insert_descriptor(blob, set_path, 1, label, error); - if (_use_aws_storage) { - // delete files in set_path - std::uintmax_t n = fs::remove_all(set_path); - std::cout << "Deleted " << n << " files or directories\n"; - } + if (id < 0) { + error["status"] = RSCommand::Error; - return -1; + if (_use_aws_storage) { + // delete files in set_path + std::uintmax_t n = fs::remove_all(set_path); + std::cout << "Deleted " << n << " files or directories\n"; } - props[VDMS_DESC_ID_PROP] = Json::Int64(id); + return -1; + } - int node_ref = get_value(cmd, "_ref", query.get_available_reference()); + props[VDMS_DESC_ID_PROP] = Json::Int64(id); - query.AddNode(node_ref, VDMS_DESC_TAG, props, Json::nullValue); + int node_ref = get_value(cmd, "_ref", query.get_available_reference()); - // It passed the checker, so it exists. - int set_ref = query.get_available_reference(); + query.AddNode(node_ref, VDMS_DESC_TAG, props, Json::nullValue); - Json::Value link; - Json::Value results; - Json::Value list_arr; - list_arr.append(VDMS_DESC_SET_PATH_PROP); - list_arr.append(VDMS_DESC_SET_DIM_PROP); - results["list"] = list_arr; + // It passed the checker, so it exists. + int set_ref = query.get_available_reference(); - Json::Value constraints; - Json::Value name_arr; - name_arr.append("=="); - name_arr.append(set_name); - constraints[VDMS_DESC_SET_NAME_PROP] = name_arr; + Json::Value link; + Json::Value results; + Json::Value list_arr; + list_arr.append(VDMS_DESC_SET_PATH_PROP); + list_arr.append(VDMS_DESC_SET_DIM_PROP); + results["list"] = list_arr; - bool unique = true; + Json::Value constraints; + Json::Value name_arr; + name_arr.append("=="); + name_arr.append(set_name); + constraints[VDMS_DESC_SET_NAME_PROP] = name_arr; - // Query set node - query.QueryNode(set_ref, VDMS_DESC_SET_TAG, link, constraints, results, - unique); + bool unique = true; - if (cmd.isMember("link")) { - add_link(query, cmd["link"], node_ref, VDMS_DESC_EDGE_TAG); - } + // Query set node + query.QueryNode(set_ref, VDMS_DESC_SET_TAG, link, constraints, results, + unique); - Json::Value props_edge; - query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); + if (cmd.isMember("link")) { + add_link(query, cmd["link"], node_ref, VDMS_DESC_EDGE_TAG); + } - // TODO: deleting files here causes problems with concurrency (TestRetail.py) - // keeping local copies as a temporary solution - // if(_use_aws_storage) - // { - // //delete files in set_path - // std::uintmax_t n = fs::remove_all(set_path); - // std::cout << "Deleted " << n << " files or directories\n"; - // } + Json::Value props_edge; + query.AddEdge(-1, set_ref, node_ref, VDMS_DESC_SET_EDGE_TAG, props_edge); - return 0; + // TODO: deleting files here causes problems with concurrency (TestRetail.py) + // keeping local copies as a temporary solution + // if(_use_aws_storage) + // { + // //delete files in set_path + // std::uintmax_t n = fs::remove_all(set_path); + // std::cout << "Deleted " << n << " files or directories\n"; + // } + return 0; } int AddDescriptor::add_descriptor_batch(PMGDQuery &query, - const Json::Value &jsoncmd, - const std::string &blob, int grp_id, - Json::Value &error){ + const Json::Value &jsoncmd, + const std::string &blob, int grp_id, + Json::Value &error) { - const int FOUR_BYTE_INT = 4; - int expected_blb_size; - int nr_expected_descs; - int dimensions; + const int FOUR_BYTE_INT = 4; + int expected_blb_size; + int nr_expected_descs; + int dimensions; - //Extract set name - const Json::Value &cmd = jsoncmd[_cmd_name]; - const std::string set_name = cmd["set"].asString(); + // Extract set name + const Json::Value &cmd = jsoncmd[_cmd_name]; + const std::string set_name = cmd["set"].asString(); - //Json::Value props = get_value(cmd, "properties"); + // Json::Value props = get_value(cmd, "properties"); - //extract properties list and get filepath/object location of set - Json::Value prop_list = get_value(cmd, "batch_properties"); - const std::string set_path = get_set_path(query, set_name, dimensions); + // extract properties list and get filepath/object location of set + Json::Value prop_list = get_value(cmd, "batch_properties"); + const std::string set_path = get_set_path(query, set_name, dimensions); - if (set_path.empty()) { - error["info"] = "Set " + set_name + " not found"; - error["status"] = RSCommand::Error; - return -1; - } + if (set_path.empty()) { + error["info"] = "Set " + set_name + " not found"; + error["status"] = RSCommand::Error; + return -1; + } + + std::string label = get_value(cmd, "label", "None"); + // props[VDMS_DESC_LABEL_PROP] = label; - std::string label = get_value(cmd, "label", "None"); - //props[VDMS_DESC_LABEL_PROP] = label; + // retrieve the descriptor set from AWS here + // operations are currently done in memory with no subsequent write to disk + // so there's no need to re-upload to AWS + if (_use_aws_storage) { + retrieve_aws_descriptorSet(set_path); + } + + // Note dimensionse are based on a 32 bit integer, hence the /4 math on size + // as the string blob is sized in 8 bit ints. + nr_expected_descs = prop_list.size(); + expected_blb_size = nr_expected_descs * dimensions * FOUR_BYTE_INT; + + // Verify length of input is matching expectations + if (blob.length() != expected_blb_size) { + std::cerr << "AddDescriptor::insert_descriptor: "; + std::cerr << "Expected Blob Length Does Not Match Input "; + std::cerr << "Input Length: " << blob.length() << " != " + << "Expected Length: " << expected_blb_size << std::endl; + error["info"] = "FV Input Length Mismatch"; + return -1; + } + + long id = insert_descriptor(blob, set_path, nr_expected_descs, label, error); + + if (id < 0) { + error["status"] = RSCommand::Error; - // retrieve the descriptor set from AWS here - // operations are currently done in memory with no subsequent write to disk - // so there's no need to re-upload to AWS if (_use_aws_storage) { - retrieve_aws_descriptorSet(set_path); + // delete files in set_path + std::uintmax_t n = fs::remove_all(set_path); + std::cout << "Deleted " << n << " files or directories\n"; } + error["info"] = "FV Index Insert Failed"; + return -1; + } - // Note dimensionse are based on a 32 bit integer, hence the /4 math on size - // as the string blob is sized in 8 bit ints. - nr_expected_descs = prop_list.size(); - expected_blb_size = nr_expected_descs * dimensions * FOUR_BYTE_INT; - - //Verify length of input is matching expectations - if (blob.length() != expected_blb_size) { - std::cerr << "AddDescriptor::insert_descriptor: "; - std::cerr << "Expected Blob Length Does Not Match Input "; - std::cerr << "Input Length: " <(cmd, "batch_properties"); - if(prop_list.size() == 0){ - rc = add_single_descriptor(query, jsoncmd, blob, grp_id, error); + if (prop_list.size() == 0) { + rc = add_single_descriptor(query, jsoncmd, blob, grp_id, error); } else { - rc = add_descriptor_batch(query, jsoncmd, blob, grp_id, error); + rc = add_descriptor_batch(query, jsoncmd, blob, grp_id, error); } - if(rc < 0) error["status"] = RSCommand::Error; - + if (rc < 0) + error["status"] = RSCommand::Error; return rc; - } Json::Value AddDescriptor::construct_responses( diff --git a/src/DescriptorsCommand.h b/src/DescriptorsCommand.h index 0199dd99..4bcf4ee3 100644 --- a/src/DescriptorsCommand.h +++ b/src/DescriptorsCommand.h @@ -59,7 +59,7 @@ class DescriptorsCommand : public RSCommand { tbb::concurrent_unordered_map _cache_map; static tbb::concurrent_unordered_map - _desc_set_locator; + _desc_set_locator; static tbb::concurrent_unordered_map _desc_set_dims; // Will return the path to the set and the dimensions @@ -133,12 +133,12 @@ class AddDescriptor : public DescriptorsCommand { void retrieve_aws_descriptorSet(const std::string &set_path); int add_single_descriptor(PMGDQuery &tx, const Json::Value &root, - const std::string &blob, int grp_id, - Json::Value &error); + const std::string &blob, int grp_id, + Json::Value &error); int add_descriptor_batch(PMGDQuery &tx, const Json::Value &root, - const std::string &blob, int grp_id, - Json::Value &error); + const std::string &blob, int grp_id, + Json::Value &error); public: AddDescriptor(); diff --git a/src/QueryHandlerPMGD.cc b/src/QueryHandlerPMGD.cc index 2c17895d..214318e8 100644 --- a/src/QueryHandlerPMGD.cc +++ b/src/QueryHandlerPMGD.cc @@ -63,7 +63,8 @@ std::unordered_map QueryHandlerPMGD::_rs_cmds; // DescriptorCommand.h tbb::concurrent_unordered_map DescriptorsCommand::_desc_set_locator; -tbb::concurrent_unordered_map DescriptorsCommand::_desc_set_dims; +tbb::concurrent_unordered_map + DescriptorsCommand::_desc_set_dims; void QueryHandlerPMGD::init() { DescriptorsManager::init(); @@ -198,7 +199,7 @@ bool QueryHandlerPMGD::syntax_checker(const Json::Value &root, int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, Json::Value &root) { - Json::Reader reader; + Json::Reader reader; const std::string commands = proto_query.json(); try { @@ -246,8 +247,10 @@ int QueryHandlerPMGD::parse_commands(const protobufs::queryMessage &proto_query, return 0; } -void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, - protobufs::queryMessage &proto_res) { //TODO Investigate why/where json throwing +void QueryHandlerPMGD::process_query( + protobufs::queryMessage &proto_query, + protobufs::queryMessage + &proto_res) { // TODO Investigate why/where json throwing Json::FastWriter fastWriter; Json::Value root; @@ -341,12 +344,10 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, construct_results.push_back(cmd_result); } - timers.add_timestamp("pmgd_query_time"); Json::Value &tx_responses = pmgd_query.run(_autodelete_init); timers.add_timestamp("pmgd_query_time"); - if (!tx_responses.isArray() || tx_responses.size() != root.size()) { Json::StyledWriter writer; std::cerr << "PMGD Response:" << std::endl; @@ -405,7 +406,6 @@ void QueryHandlerPMGD::process_query(protobufs::queryMessage &proto_query, proto_res.set_json(fastWriter.write(json_responses)); _pmgd_qh.cleanup_files(); - } catch (VCL::Exception &e) { print_exception(e); error_msg << "Internal Server Error: VCL Exception at QH" << std::endl; diff --git a/tests/python/TestDescriptors.py b/tests/python/TestDescriptors.py index 0488950a..658b6677 100644 --- a/tests/python/TestDescriptors.py +++ b/tests/python/TestDescriptors.py @@ -208,7 +208,6 @@ def test_addSetAndDescriptorsDimMismatch(self): def test_AddSetAndWrongBatchSize(self): - db = self.create_connection() # Create and verify descriptor set @@ -224,7 +223,7 @@ def test_AddSetAndWrongBatchSize(self): trans_list.append(trans_dict) response, img_array = db.query(trans_list) - self.assertEqual(response[0]["AddDescriptorSet"]["status"],0) + self.assertEqual(response[0]["AddDescriptorSet"]["status"], 0) # Create and add a batch of feature vectors trans = [] @@ -236,7 +235,7 @@ def test_AddSetAndWrongBatchSize(self): x = x.astype("float32") desc_blob.append(x.tobytes()) - properties_list=[] + properties_list = [] for x in range(batch_size + 3): props = {"batchprop": x} properties_list.append(props) @@ -272,7 +271,7 @@ def test_AddSetAndInsertBatch(self): trans_list.append(trans_dict) response, img_array = db.query(trans_list) - self.assertEqual(response[0]["AddDescriptorSet"]["status"],0) + self.assertEqual(response[0]["AddDescriptorSet"]["status"], 0) # Create and add a batch of feature vectors trans = [] @@ -284,7 +283,7 @@ def test_AddSetAndInsertBatch(self): x = x.astype("float32") desc_blob.append(x.tobytes()) - properties_list=[] + properties_list = [] for x in range(batch_size): props = {"batchprop": x} properties_list.append(props) @@ -303,7 +302,7 @@ def test_AddSetAndInsertBatch(self): # now try to get those same descriptors back desc_find = {} desc_find["set"] = "rightbatchsize" - desc_find["results"] = {"list":["batchprop"]} + desc_find["results"] = {"list": ["batchprop"]} query = {} query["FindDescriptor"] = desc_find @@ -312,7 +311,7 @@ def test_AddSetAndInsertBatch(self): blobs = [] trans.append(query) response, img_array = db.query(trans, blobs) - self.assertEqual(response[0]["FindDescriptor"]["returned"],10) + self.assertEqual(response[0]["FindDescriptor"]["returned"], 10) self.disconnect(db) @@ -333,7 +332,7 @@ def test_AddBatchAndFindKNN(self): trans_list.append(trans_dict) response, img_array = db.query(trans_list) - self.assertEqual(response[0]["AddDescriptorSet"]["status"],0) + self.assertEqual(response[0]["AddDescriptorSet"]["status"], 0) # Descriptor Set Created, now lets create a batch to insert # first lets make a big combined blob representing the inserted descriptor @@ -344,7 +343,7 @@ def test_AddBatchAndFindKNN(self): desc_blob = [] x = np.ones(nr_dims * batch_size) for i in range(batch_size): - x[2 + (i*nr_dims)] = 2.34 + i * 20 + x[2 + (i * nr_dims)] = 2.34 + i * 20 x = x.astype("float32") desc_blob.append(x.tobytes()) @@ -402,7 +401,6 @@ def test_AddBatchAndFindKNN(self): self.assertEqual(response[0]["FindDescriptor"]["entities"][2]["_distance"], 400) self.disconnect(db) - def test_classifyDescriptor(self): db = self.create_connection() set_name = "features_128d_4_classify" From 601b8e058d476f8585a122ca7d60d62b30498844 Mon Sep 17 00:00:00 2001 From: Ian Date: Wed, 11 Sep 2024 13:04:00 -0700 Subject: [PATCH 18/19] removing unique constraint for link based descriptor queries (#215) Co-authored-by: Chaunte W. Lacewell --- src/DescriptorsCommand.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DescriptorsCommand.cc b/src/DescriptorsCommand.cc index aaf5737e..df92b699 100644 --- a/src/DescriptorsCommand.cc +++ b/src/DescriptorsCommand.cc @@ -860,7 +860,7 @@ int FindDescriptor::construct_protobuf(PMGDQuery &query, // Query for the set query.QueryNode(-1, VDMS_DESC_SET_TAG, link_to_desc, constraints_set, - results_set, unique); + results_set, false); } // Case (2) else if (!cmd.isMember("k_neighbors")) { From 860affeceddf4118260684a262310db292713038 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:09:33 -0700 Subject: [PATCH 19/19] Bump cryptography from 42.0.8 to 43.0.1 in /.github (#218) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.8 to 43.0.1. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.8...43.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/requirements.txt b/.github/requirements.txt index 54341570..123e5765 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -3,7 +3,7 @@ cffi==1.16.0 click==8.1.7 colorlog==6.8.2 coverage==7.6.0 -cryptography==42.0.8 +cryptography==43.0.1 Flask==3.0.3 gcovr==7.2 imutils==0.5.4