diff --git a/src/qlever-petrimaps/GeomCache.cpp b/src/qlever-petrimaps/GeomCache.cpp index c4a667e..df5e025 100644 --- a/src/qlever-petrimaps/GeomCache.cpp +++ b/src/qlever-petrimaps/GeomCache.cpp @@ -5,11 +5,11 @@ #include #include +#include #include #include #include #include -#include #include #include "qlever-petrimaps/GeomCache.h" @@ -27,315 +27,346 @@ using util::geo::latLngToWebMerc; const static std::string QUERY = "PREFIX geo: " - "SELECT DISTINCT ?geometry WHERE {" + "SELECT ?geometry WHERE {" " ?osm_id geo:hasGeometry ?geometry " - " } INTERNAL SORT BY ?geometry"; + "} INTERNAL SORT BY ?geometry"; const static std::string COUNT_QUERY = "PREFIX geo: " "SELECT (COUNT(?geometry) as ?count) WHERE { " - "SELECT DISTINCT ?geometry WHERE {" " ?osm_id geo:hasGeometry ?geometry " - "} INTERNAL SORT BY ?geometry }"; + "}"; + +const static std::string QUERY_ASWKT = + "PREFIX geo: " + "SELECT ?geometry WHERE {" + " ?osm_id geo:hasGeometry ?m . ?m geo:asWKT ?geometry " + "} INTERNAL SORT BY ?geometry"; + +const static std::string COUNT_QUERY_ASWKT = + "PREFIX geo: " + "SELECT (COUNT(?geometry) AS ?count) WHERE {" + " ?osm_id geo:hasGeometry ?m . ?m geo:asWKT ?geometry " + "}"; const static std::string QUERY_WD = "PREFIX wdt: " - "SELECT DISTINCT ?coord WHERE {" + "SELECT ?coord WHERE {" " ?ob wdt:P625 ?coord" "} INTERNAL SORT BY ?coord"; const static std::string COUNT_QUERY_WD = "PREFIX wdt: " "SELECT (COUNT(?coord) as ?count) WHERE { " - "SELECT DISTINCT ?coord WHERE {" " ?ob wdt:P625 ?coord" - "} INTERNAL SORT BY ?coord }"; + "}"; + +// Helper function that returns one of the given three query strings based on +// the `backendUrl`. Used for `getQuery` and `getCountQuery` below. +static const std::string &selectQueryBasedOnUrl(const std::string &backendUrl, + const std::string &query1, + const std::string &query2, + const std::string &query3) { + // Helper lambda that returns true if the backend (part after the final + // slash) starts with the given prefix. + size_t pos = backendUrl.find_last_of('/'); + pos = pos != std::string::npos ? pos + 1 : 0; + auto backendStartsWith = [&pos, &backendUrl](const std::string &prefix) { + return backendUrl.find(prefix, pos) == pos; + }; + if (backendStartsWith("osm")) { + return query1; + } else if (backendStartsWith("wikidata") || backendStartsWith("dblp")) { + return query2; + } else { + return query3; + } +} // _____________________________________________________________________________ -const std::string& GeomCache::getQuery(const std::string& backendUrl) const { - bool is_wd = util::endsWith(backendUrl, "wikidata") || - util::endsWith(backendUrl, "dblp-plus"); - return is_wd ? QUERY_WD : QUERY; +const std::string &GeomCache::getQuery(const std::string &backendUrl) const { + return selectQueryBasedOnUrl(backendUrl, QUERY_ASWKT, QUERY_WD, QUERY); } // _____________________________________________________________________________ -const std::string& GeomCache::getCountQuery( - const std::string& backendUrl) const { - bool is_wd = util::endsWith(backendUrl, "wikidata") || - util::endsWith(backendUrl, "dblp-plus"); - return is_wd ? COUNT_QUERY_WD : COUNT_QUERY; +const std::string & +GeomCache::getCountQuery(const std::string &backendUrl) const { + return selectQueryBasedOnUrl(backendUrl, COUNT_QUERY_ASWKT, COUNT_QUERY_WD, + COUNT_QUERY); } // _____________________________________________________________________________ -size_t GeomCache::writeCbString(void* contents, size_t size, size_t nmemb, - void* userp) { - ((std::string*)userp)->append((char*)contents, size * nmemb); +size_t GeomCache::writeCbString(void *contents, size_t size, size_t nmemb, + void *userp) { + ((std::string *)userp)->append((char *)contents, size * nmemb); return size * nmemb; } // _____________________________________________________________________________ -size_t GeomCache::writeCb(void* contents, size_t size, size_t nmemb, - void* userp) { +size_t GeomCache::writeCb(void *contents, size_t size, size_t nmemb, + void *userp) { size_t realsize = size * nmemb; try { - static_cast(userp)->parse(static_cast(contents), - realsize); + static_cast(userp)->parse(static_cast(contents), + realsize); } catch (...) { - static_cast(userp)->_exceptionPtr = std::current_exception(); + static_cast(userp)->_exceptionPtr = std::current_exception(); return CURLE_WRITE_ERROR; } return realsize; } // _____________________________________________________________________________ -size_t GeomCache::writeCbIds(void* contents, size_t size, size_t nmemb, - void* userp) { +size_t GeomCache::writeCbIds(void *contents, size_t size, size_t nmemb, + void *userp) { size_t realsize = size * nmemb; try { - static_cast(userp)->parseIds(static_cast(contents), - realsize); + static_cast(userp)->parseIds( + static_cast(contents), realsize); } catch (...) { - static_cast(userp)->_exceptionPtr = std::current_exception(); + static_cast(userp)->_exceptionPtr = std::current_exception(); return CURLE_WRITE_ERROR; } return realsize; } // _____________________________________________________________________________ -size_t GeomCache::writeCbCount(void* contents, size_t size, size_t nmemb, - void* userp) { +size_t GeomCache::writeCbCount(void *contents, size_t size, size_t nmemb, + void *userp) { size_t realsize = size * nmemb; try { - static_cast(userp)->parseCount( - static_cast(contents), realsize); + static_cast(userp)->parseCount( + static_cast(contents), realsize); } catch (...) { - static_cast(userp)->_exceptionPtr = std::current_exception(); + static_cast(userp)->_exceptionPtr = std::current_exception(); return CURLE_WRITE_ERROR; } return realsize; } // _____________________________________________________________________________ -void GeomCache::parse(const char* c, size_t size) { +void GeomCache::parse(const char *c, size_t size) { _loadStatusStage = _LoadStatusStages::Parse; - const char* start = c; + const char *start = c; while (c < start + size) { - if (_raw.size() < 10000) _raw.push_back(*c); + if (_raw.size() < 10000) + _raw.push_back(*c); switch (_state) { - case IN_HEADER: - if (*c == '\n') { - _state = IN_ROW; - c++; - continue; - } else { - c++; - continue; - } - case IN_ROW: - if (*c == '\t' || *c == '\n') { - // bool isGeom = util::endsWith( - // _dangling, "^^"); - - bool isGeom = true; - - auto p = _dangling.rfind("\"POINT(", 0); + case IN_HEADER: + if (*c == '\n') { + _state = IN_ROW; + c++; + continue; + } else { + c++; + continue; + } + case IN_ROW: + if (*c == '\t' || *c == '\n') { + // bool isGeom = util::endsWith( + // _dangling, "^^"); + + bool isGeom = true; + + auto p = _dangling.rfind("\"POINT(", 0); + + // if the previous was not a multi geometry, and if the strings + // match exactly, re-use the geometry + if (isGeom && _prev == _dangling && _lastQidToId.qid == 0) { + IdMapping idm{0, _lastQidToId.id}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } else if (isGeom && p != std::string::npos) { + _curUniqueGeom++; + p += 7; + auto point = parsePoint(_dangling, p); + if (pointValid(point)) { + _pointsF.write(reinterpret_cast(&point), + sizeof(util::geo::FPoint)); + _pointsFSize++; + IdMapping idm{0, _pointsFSize - 1}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } else { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } + } else if (isGeom && (p = _dangling.rfind("\"LINESTRING(", 0)) != + std::string::npos) { + _curUniqueGeom++; + p += 12; + const auto &line = parseLineString(_dangling, p); + if (line.size() == 0) { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } else { + _linesF.write(reinterpret_cast(&_linePointsFSize), + sizeof(size_t)); + _linesFSize++; + insertLine(line, false); - // if the previous was not a multi geometry, and if the strings - // match exactly, re-use the geometry - if (isGeom && _prev == _dangling && _lastQidToId.qid == 0) { - IdMapping idm{0, _lastQidToId.id}; + IdMapping idm{0, I_OFFSET + _linesFSize - 1}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; - } else if (isGeom && p != std::string::npos) { - _curUniqueGeom++; - p += 7; - auto point = parsePoint(_dangling, p); - if (pointValid(point)) { - _pointsF.write(reinterpret_cast(&point), - sizeof(util::geo::FPoint)); - _pointsFSize++; - IdMapping idm{0, _pointsFSize - 1}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } else { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - } else if (isGeom && (p = _dangling.rfind("\"LINESTRING(", 0)) != - std::string::npos) { - _curUniqueGeom++; - p += 12; - const auto& line = parseLineString(_dangling, p); + } + } else if (isGeom && (p = _dangling.rfind("\"MULTILINESTRING(", 0)) != + std::string::npos) { + _curUniqueGeom++; + p += 17; + size_t i = 0; + while ((p = _dangling.find("(", p + 1)) != std::string::npos) { + const auto &line = parseLineString(_dangling, p + 1); if (line.size() == 0) { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; + if (i == 0) { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } } else { - _linesF.write(reinterpret_cast(&_linePointsFSize), + _linesF.write(reinterpret_cast(&_linePointsFSize), sizeof(size_t)); _linesFSize++; insertLine(line, false); - IdMapping idm{0, I_OFFSET + _linesFSize - 1}; + IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } - } else if (isGeom && (p = _dangling.rfind("\"MULTILINESTRING(", 0)) != - std::string::npos) { - _curUniqueGeom++; - p += 17; - size_t i = 0; - while ((p = _dangling.find("(", p + 1)) != std::string::npos) { - const auto& line = parseLineString(_dangling, p + 1); - if (line.size() == 0) { - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - } else { - _linesF.write(reinterpret_cast(&_linePointsFSize), - sizeof(size_t)); - _linesFSize++; - insertLine(line, false); - - IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - i++; - } - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - } else if (isGeom && (p = _dangling.rfind("\"POLYGON(", 0)) != - std::string::npos) { - _curUniqueGeom++; - p += 9; - size_t i = 0; - while ((p = _dangling.find("(", p + 1)) != std::string::npos) { - const auto& line = parseLineString(_dangling, p + 1); - if (line.size() == 0) { - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - } else { - _linesF.write(reinterpret_cast(&_linePointsFSize), - sizeof(size_t)); - _linesFSize++; - insertLine(line, true); - - IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; + i++; + } + if (i == 0) { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } + } else if (isGeom && (p = _dangling.rfind("\"POLYGON(", 0)) != + std::string::npos) { + _curUniqueGeom++; + p += 9; + size_t i = 0; + while ((p = _dangling.find("(", p + 1)) != std::string::npos) { + const auto &line = parseLineString(_dangling, p + 1); + if (line.size() == 0) { + if (i == 0) { + IdMapping idm{0, std::numeric_limits::max()}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } - i++; - } - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; + } else { + _linesF.write(reinterpret_cast(&_linePointsFSize), + sizeof(size_t)); + _linesFSize++; + insertLine(line, true); + + IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } - } else if (isGeom && (p = _dangling.rfind("\"MULTIPOLYGON(", 0)) != - std::string::npos) { - _curUniqueGeom++; - p += 13; - size_t i = 0; - while ((p = _dangling.find("(", p + 1)) != std::string::npos) { - if (_dangling[p + 1] == '(') p++; - const auto& line = parseLineString(_dangling, p + 1); - if (line.size() == 0) { - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; - _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), - sizeof(IdMapping)); - _qidToIdFSize++; - } - } else { - _linesF.write(reinterpret_cast(&_linePointsFSize), - sizeof(size_t)); - _linesFSize++; - insertLine(line, true); - - IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; + i++; + } + if (i == 0) { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } + } else if (isGeom && (p = _dangling.rfind("\"MULTIPOLYGON(", 0)) != + std::string::npos) { + _curUniqueGeom++; + p += 13; + size_t i = 0; + while ((p = _dangling.find("(", p + 1)) != std::string::npos) { + if (_dangling[p + 1] == '(') + p++; + const auto &line = parseLineString(_dangling, p + 1); + if (line.size() == 0) { + if (i == 0) { + IdMapping idm{0, std::numeric_limits::max()}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } - i++; - } - if (i == 0) { - IdMapping idm{0, std::numeric_limits::max()}; + } else { + _linesF.write(reinterpret_cast(&_linePointsFSize), + sizeof(size_t)); + _linesFSize++; + insertLine(line, true); + + IdMapping idm{i == 0 ? 0 : 1, I_OFFSET + _linesFSize - 1}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } - } else { + i++; + } + if (i == 0) { IdMapping idm{0, std::numeric_limits::max()}; _lastQidToId = idm; - _qidToIdF.write(reinterpret_cast(&idm), + _qidToIdF.write(reinterpret_cast(&idm), sizeof(IdMapping)); _qidToIdFSize++; } + } else { + IdMapping idm{0, std::numeric_limits::max()}; + _lastQidToId = idm; + _qidToIdF.write(reinterpret_cast(&idm), + sizeof(IdMapping)); + _qidToIdFSize++; + } - if (*c == '\n') { - _curRow++; - if (_curRow % 1000000 == 0) { - LOG(INFO) << "[GEOMCACHE] " - << "@ row " << _curRow << " (" << std::fixed - << std::setprecision(2) << getLoadStatusPercent() - << "%, " << _pointsFSize << " points, " << _linesFSize - << " (open) polygons)"; - } - _prev = _dangling; - _dangling.clear(); - c++; - continue; - } else { - _prev = _dangling; - _dangling.clear(); - c++; - continue; + if (*c == '\n') { + _curRow++; + if (_curRow % 1000000 == 0) { + LOG(INFO) << "[GEOMCACHE] " + << "@ row " << _curRow << " (" << std::fixed + << std::setprecision(2) << getLoadStatusPercent() << "%, " + << _pointsFSize << " points, " << _linesFSize + << " (open) polygons)"; } + _prev = _dangling; + _dangling.clear(); + c++; + continue; + } else { + _prev = _dangling; + _dangling.clear(); + c++; + continue; } + } - _dangling += toupper(*c); - c++; + _dangling += toupper(*c); + c++; - break; - default: - break; + break; + default: + break; } } } @@ -361,32 +392,31 @@ double GeomCache::getLoadStatusPercent(bool total) { double parseIdsPercent = 5.0; double totalPercent = 0.0; switch (_loadStatusStage) { - case _LoadStatusStages::Parse: - totalPercent = std::atomic(_curRow) / - static_cast(_totalSize) * parsePercent; - break; - case _LoadStatusStages::ParseIds: - totalPercent = parsePercent; - totalPercent += std::atomic(_curRow) / - static_cast(_totalSize) * parseIdsPercent; - break; + case _LoadStatusStages::Parse: + totalPercent = std::atomic(_curRow) / + static_cast(_totalSize) * parsePercent; + break; + case _LoadStatusStages::ParseIds: + totalPercent = parsePercent; + totalPercent += std::atomic(_curRow) / + static_cast(_totalSize) * parseIdsPercent; + break; } return totalPercent; } // _____________________________________________________________________________ -int GeomCache::getLoadStatusStage() { - return _loadStatusStage; -} +int GeomCache::getLoadStatusStage() { return _loadStatusStage; } // _____________________________________________________________________________ -void GeomCache::parseIds(const char* c, size_t size) { +void GeomCache::parseIds(const char *c, size_t size) { _loadStatusStage = _LoadStatusStages::ParseIds; size_t lastQid = -1; for (size_t i = 0; i < size; i++) { - if (_raw.size() < 10000) _raw.push_back(c[i]); + if (_raw.size() < 10000) + _raw.push_back(c[i]); _curId.bytes[_curByte] = c[i]; _curByte = (_curByte + 1) % 8; @@ -394,8 +424,8 @@ void GeomCache::parseIds(const char* c, size_t size) { if (_curRow % 1000000 == 0) { LOG(INFO) << "[GEOMCACHE] " << "@ row " << _curRow << " (" << std::fixed - << std::setprecision(2) << getLoadStatusPercent() - << "%, " << _pointsFSize << " points, " << _linesFSize + << std::setprecision(2) << getLoadStatusPercent() << "%, " + << _pointsFSize << " points, " << _linesFSize << " (open) polygons)"; } @@ -416,7 +446,8 @@ void GeomCache::parseIds(const char* c, size_t size) { _qidToId[_curRow].qid = _curId.val; } lastQid = _curId.val; - if (_curId.val > _maxQid) _maxQid = _curId.val; + if (_curId.val > _maxQid) + _maxQid = _curId.val; } else { LOG(WARN) << "The results for the binary IDs are out of sync."; LOG(WARN) << "_curRow: " << _curRow @@ -439,11 +470,14 @@ void GeomCache::parseIds(const char* c, size_t size) { } // _____________________________________________________________________________ -void GeomCache::parseCount(const char* c, size_t size) { +void GeomCache::parseCount(const char *c, size_t size) { for (size_t i = 0; i < size; i++) { - if (_raw.size() < 10000) _raw.push_back(c[i]); - if (c[i] == '\n') _state = IN_ROW; - if (_state == IN_ROW) _dangling += c[i]; + if (_raw.size() < 10000) + _raw.push_back(c[i]); + if (c[i] == '\n') + _state = IN_ROW; + if (_state == IN_ROW) + _dangling += c[i]; } } @@ -459,7 +493,11 @@ size_t GeomCache::requestSize() { char errbuf[CURL_ERROR_SIZE]; if (_curl) { - auto qUrl = queryUrl(getCountQuery(_backendUrl), 0, 1); + const std::string &countQuery = getCountQuery(_backendUrl); + LOG(INFO) << "[GEOMCACHE] Count query to obtain the number of geometries:" + << std::endl + << countQuery; + auto qUrl = queryUrl(countQuery, 0, 1); curl_easy_setopt(_curl, CURLOPT_URL, qUrl.c_str()); curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, GeomCache::writeCbCount); curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this); @@ -469,7 +507,7 @@ size_t GeomCache::requestSize() { curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, 0); // set headers - struct curl_slist* headers = 0; + struct curl_slist *headers = 0; headers = curl_slist_append(headers, "Accept: text/tab-separated-values"); curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, headers); @@ -491,7 +529,8 @@ size_t GeomCache::requestSize() { throw std::runtime_error(ss.str()); } - if (_exceptionPtr) std::rethrow_exception(_exceptionPtr); + if (_exceptionPtr) + std::rethrow_exception(_exceptionPtr); } else { LOG(ERROR) << "[GEOMCACHE] Failed to perform curl request."; return -1; @@ -535,7 +574,7 @@ void GeomCache::requestPart(size_t offset) { curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, 0); // set headers - struct curl_slist* headers = 0; + struct curl_slist *headers = 0; headers = curl_slist_append(headers, "Accept: text/tab-separated-values"); curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, headers); @@ -557,7 +596,8 @@ void GeomCache::requestPart(size_t offset) { throw std::runtime_error(ss.str()); } - if (_exceptionPtr) std::rethrow_exception(_exceptionPtr); + if (_exceptionPtr) + std::rethrow_exception(_exceptionPtr); } else { LOG(ERROR) << "[GEOMCACHE] Failed to perform curl request."; return; @@ -594,25 +634,29 @@ void GeomCache::request() { _raw.clear(); _raw.reserve(100000); - char* pointsFName = strdup("pointsXXXXXX"); + char *pointsFName = strdup("pointsXXXXXX"); int i = mkstemp(pointsFName); - if (i == -1) throw std::runtime_error("Could not create temporary file"); + if (i == -1) + throw std::runtime_error("Could not create temporary file"); _pointsF.open(pointsFName, std::ios::out | std::ios::in | std::ios::binary); - char* linePointsFName = strdup("linepointsXXXXXX"); + char *linePointsFName = strdup("linepointsXXXXXX"); i = mkstemp(linePointsFName); - if (i == -1) throw std::runtime_error("Could not create temporary file"); + if (i == -1) + throw std::runtime_error("Could not create temporary file"); _linePointsF.open(linePointsFName, std::ios::out | std::ios::in | std::ios::binary); - char* linesFName = strdup("linesXXXXXX"); + char *linesFName = strdup("linesXXXXXX"); i = mkstemp(linesFName); - if (i == -1) throw std::runtime_error("Could not create temporary file"); + if (i == -1) + throw std::runtime_error("Could not create temporary file"); _linesF.open(linesFName, std::ios::out | std::ios::in | std::ios::binary); - char* qidToIdFName = strdup("qidtoidXXXXXX"); + char *qidToIdFName = strdup("qidtoidXXXXXX"); i = mkstemp(qidToIdFName); - if (i == -1) throw std::runtime_error("Could not create temporary file"); + if (i == -1) + throw std::runtime_error("Could not create temporary file"); _qidToIdF.open(qidToIdFName, std::ios::out | std::ios::in | std::ios::binary); // immediately unlink @@ -645,31 +689,32 @@ void GeomCache::request() { lastNum = _curRow - offset; } - if (i == -1) throw std::runtime_error("Could not create temporary file"); + if (i == -1) + throw std::runtime_error("Could not create temporary file"); LOG(INFO) << "[GEOMCACHE] Building vectors..."; _points.resize(_pointsFSize); _pointsF.seekg(0); - _pointsF.read(reinterpret_cast(&_points[0]), + _pointsF.read(reinterpret_cast(&_points[0]), sizeof(util::geo::FPoint) * _pointsFSize); _pointsF.close(); _linePoints.resize(_linePointsFSize); _linePointsF.seekg(0); - _linePointsF.read(reinterpret_cast(&_linePoints[0]), + _linePointsF.read(reinterpret_cast(&_linePoints[0]), sizeof(util::geo::Point) * _linePointsFSize); _linePointsF.close(); _lines.resize(_linesFSize); _linesF.seekg(0); - _linesF.read(reinterpret_cast(&_lines[0]), + _linesF.read(reinterpret_cast(&_lines[0]), sizeof(size_t) * _linesFSize); _linesF.close(); _qidToId.resize(_qidToIdFSize); _qidToIdF.seekg(0); - _qidToIdF.read(reinterpret_cast(&_qidToId[0]), + _qidToIdF.read(reinterpret_cast(&_qidToId[0]), sizeof(IdMapping) * _qidToIdFSize); _qidToIdF.close(); @@ -700,7 +745,7 @@ void GeomCache::requestIds() { curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, 0); // set headers - struct curl_slist* headers = 0; + struct curl_slist *headers = 0; headers = curl_slist_append(headers, "Accept: application/octet-stream"); curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, headers); @@ -721,7 +766,8 @@ void GeomCache::requestIds() { throw std::runtime_error(ss.str()); } - if (_exceptionPtr) std::rethrow_exception(_exceptionPtr); + if (_exceptionPtr) + std::rethrow_exception(_exceptionPtr); } else { LOG(ERROR) << "[GEOMCACHE] Failed to perform curl request."; } @@ -759,27 +805,35 @@ std::string GeomCache::queryUrl(std::string query, size_t offset, } // _____________________________________________________________________________ -bool GeomCache::pointValid(const FPoint& p) { - if (p.getY() > std::numeric_limits::max()) return false; - if (p.getY() < std::numeric_limits::lowest()) return false; - if (p.getX() > std::numeric_limits::max()) return false; - if (p.getX() < std::numeric_limits::lowest()) return false; +bool GeomCache::pointValid(const FPoint &p) { + if (p.getY() > std::numeric_limits::max()) + return false; + if (p.getY() < std::numeric_limits::lowest()) + return false; + if (p.getX() > std::numeric_limits::max()) + return false; + if (p.getX() < std::numeric_limits::lowest()) + return false; return true; } // _____________________________________________________________________________ -bool GeomCache::pointValid(const DPoint& p) { - if (p.getY() > std::numeric_limits::max()) return false; - if (p.getY() < std::numeric_limits::lowest()) return false; - if (p.getX() > std::numeric_limits::max()) return false; - if (p.getX() < std::numeric_limits::lowest()) return false; +bool GeomCache::pointValid(const DPoint &p) { + if (p.getY() > std::numeric_limits::max()) + return false; + if (p.getY() < std::numeric_limits::lowest()) + return false; + if (p.getX() > std::numeric_limits::max()) + return false; + if (p.getX() < std::numeric_limits::lowest()) + return false; return true; } // _____________________________________________________________________________ -util::geo::DLine GeomCache::parseLineString(const std::string& a, +util::geo::DLine GeomCache::parseLineString(const std::string &a, size_t p) const { util::geo::DLine line; line.reserve(2); @@ -787,18 +841,20 @@ util::geo::DLine GeomCache::parseLineString(const std::string& a, assert(end); while (true) { - auto point = latLngToWebMerc(DPoint( - util::atof(a.c_str() + p, 10), - util::atof( - static_cast(memchr(a.c_str() + p, ' ', a.size() - p)) + - 1, - 10))); + auto point = latLngToWebMerc( + DPoint(util::atof(a.c_str() + p, 10), + util::atof(static_cast( + memchr(a.c_str() + p, ' ', a.size() - p)) + + 1, + 10))); - if (pointValid(point)) line.push_back(point); + if (pointValid(point)) + line.push_back(point); auto n = memchr(a.c_str() + p, ',', a.size() - p); - if (!n || n > end) break; - p = static_cast(n) - a.c_str() + 1; + if (!n || n > end) + break; + p = static_cast(n) - a.c_str() + 1; } // the 200 is the THRESHOLD from Server.cpp @@ -807,20 +863,20 @@ util::geo::DLine GeomCache::parseLineString(const std::string& a, } // _____________________________________________________________________________ -util::geo::FPoint GeomCache::parsePoint(const std::string& a, size_t p) const { - auto point = latLngToWebMerc(FPoint( - util::atof(a.c_str() + p, 10), - util::atof( - static_cast(memchr(a.c_str() + p, ' ', a.size() - p)) + - 1, - 10))); +util::geo::FPoint GeomCache::parsePoint(const std::string &a, size_t p) const { + auto point = + latLngToWebMerc(FPoint(util::atof(a.c_str() + p, 10), + util::atof(static_cast(memchr( + a.c_str() + p, ' ', a.size() - p)) + + 1, + 10))); return point; } // _____________________________________________________________________________ std::pair>, size_t> -GeomCache::getRelObjects(const std::vector& ids) const { +GeomCache::getRelObjects(const std::vector &ids) const { // (geom id, result row) std::vector> ret; @@ -838,7 +894,8 @@ GeomCache::getRelObjects(const std::vector& ids) const { size_t prefJ = j; while (j < _qidToId.size() && ids[i].qid == _qidToId[j].qid) { - if (ret.size() == 0 || ret.back().second != ids[i].id) numObjects++; + if (ret.size() == 0 || ret.back().second != ids[i].id) + numObjects++; ret.push_back({_qidToId[j].id, ids[i].id}); j++; } @@ -874,17 +931,17 @@ GeomCache::getRelObjects(const std::vector& ids) const { } // _____________________________________________________________________________ -void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { +void GeomCache::insertLine(const util::geo::DLine &l, bool isArea) { // we also add the line's bounding box here to also // compress that - const auto& bbox = util::geo::getBoundingBox(l); + const auto &bbox = util::geo::getBoundingBox(l); int16_t mainX = (bbox.getLowerLeft().getX() * 10.0) / M_COORD_GRANULARITY; int16_t mainY = (bbox.getLowerLeft().getY() * 10.0) / M_COORD_GRANULARITY; if (mainX != 0 || mainY != 0) { util::geo::Point p{mCoord(mainX), mCoord(mainY)}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; } @@ -896,7 +953,7 @@ void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { (bbox.getLowerLeft().getY() * 10.0) - mainY * M_COORD_GRANULARITY; util::geo::Point p{minorXLoc, minorYLoc}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; @@ -912,17 +969,17 @@ void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { mainY = mainYLoc; util::geo::Point p{mCoord(mainX), mCoord(mainY)}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; } p = util::geo::Point{minorXLoc, minorYLoc}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; // add line points - for (const auto& p : l) { + for (const auto &p : l) { mainXLoc = (p.getX() * 10.0) / M_COORD_GRANULARITY; mainYLoc = (p.getY() * 10.0) / M_COORD_GRANULARITY; @@ -931,7 +988,7 @@ void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { mainY = mainYLoc; util::geo::Point p{mCoord(mainX), mCoord(mainY)}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; } @@ -940,7 +997,7 @@ void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { int16_t minorYLoc = (p.getY() * 10.0) - mainYLoc * M_COORD_GRANULARITY; util::geo::Point pp{minorXLoc, minorYLoc}; - _linePointsF.write(reinterpret_cast(&pp), + _linePointsF.write(reinterpret_cast(&pp), sizeof(util::geo::Point)); _linePointsFSize++; } @@ -949,7 +1006,7 @@ void GeomCache::insertLine(const util::geo::DLine& l, bool isArea) { // other types) if (isArea) { util::geo::Point p{mCoord(0), mCoord(0)}; - _linePointsF.write(reinterpret_cast(&p), + _linePointsF.write(reinterpret_cast(&p), sizeof(util::geo::Point)); _linePointsFSize++; } @@ -966,7 +1023,7 @@ util::geo::DBox GeomCache::getLineBBox(size_t lid) const { double mainY = 0; for (size_t i = start; i < start + 4; i++) { // extract real geom - const auto& cur = _linePoints[i]; + const auto &cur = _linePoints[i]; if (isMCoord(cur.getX())) { mainX = rmCoord(cur.getX()); @@ -990,7 +1047,7 @@ util::geo::DBox GeomCache::getLineBBox(size_t lid) const { } // _____________________________________________________________________________ -std::string GeomCache::indexHashFromDisk(const std::string& fname) { +std::string GeomCache::indexHashFromDisk(const std::string &fname) { std::ifstream f(fname, std::ios::binary); char tmp[100]; f.read(tmp, 100); @@ -1000,7 +1057,7 @@ std::string GeomCache::indexHashFromDisk(const std::string& fname) { } // _____________________________________________________________________________ -void GeomCache::fromDisk(const std::string& fname) { +void GeomCache::fromDisk(const std::string &fname) { _points.clear(); _linePoints.clear(); _lines.clear(); @@ -1015,29 +1072,29 @@ void GeomCache::fromDisk(const std::string& fname) { _indexHash = util::trim(tmp); size_t numPoints; - f.read(reinterpret_cast(&numPoints), sizeof(size_t)); + f.read(reinterpret_cast(&numPoints), sizeof(size_t)); _points.resize(numPoints); - f.read(reinterpret_cast(&_points[0]), + f.read(reinterpret_cast(&_points[0]), sizeof(util::geo::FPoint) * numPoints); - f.read(reinterpret_cast(&numPoints), sizeof(size_t)); + f.read(reinterpret_cast(&numPoints), sizeof(size_t)); _linePoints.resize(numPoints); - f.read(reinterpret_cast(&_linePoints[0]), + f.read(reinterpret_cast(&_linePoints[0]), sizeof(util::geo::Point) * numPoints); - f.read(reinterpret_cast(&numPoints), sizeof(size_t)); + f.read(reinterpret_cast(&numPoints), sizeof(size_t)); _lines.resize(numPoints); - f.read(reinterpret_cast(&_lines[0]), sizeof(size_t) * numPoints); + f.read(reinterpret_cast(&_lines[0]), sizeof(size_t) * numPoints); - f.read(reinterpret_cast(&numPoints), sizeof(size_t)); + f.read(reinterpret_cast(&numPoints), sizeof(size_t)); _qidToId.resize(numPoints); - f.read(reinterpret_cast(&_qidToId[0]), sizeof(IdMapping) * numPoints); + f.read(reinterpret_cast(&_qidToId[0]), sizeof(IdMapping) * numPoints); f.close(); } // _____________________________________________________________________________ -void GeomCache::serializeToDisk(const std::string& fname) const { +void GeomCache::serializeToDisk(const std::string &fname) const { std::ofstream f; f.open(fname); @@ -1049,22 +1106,23 @@ void GeomCache::serializeToDisk(const std::string& fname) const { f.write(h.c_str(), 100); size_t num = _points.size(); - f.write(reinterpret_cast(&num), sizeof(size_t)); - f.write(reinterpret_cast(&_points[0]), + f.write(reinterpret_cast(&num), sizeof(size_t)); + f.write(reinterpret_cast(&_points[0]), sizeof(util::geo::FPoint) * num); num = _linePoints.size(); - f.write(reinterpret_cast(&num), sizeof(size_t)); - f.write(reinterpret_cast(&_linePoints[0]), + f.write(reinterpret_cast(&num), sizeof(size_t)); + f.write(reinterpret_cast(&_linePoints[0]), sizeof(util::geo::Point) * num); num = _lines.size(); - f.write(reinterpret_cast(&num), sizeof(size_t)); - f.write(reinterpret_cast(&_lines[0]), sizeof(size_t) * num); + f.write(reinterpret_cast(&num), sizeof(size_t)); + f.write(reinterpret_cast(&_lines[0]), sizeof(size_t) * num); num = _qidToId.size(); - f.write(reinterpret_cast(&num), sizeof(size_t)); - f.write(reinterpret_cast(&_qidToId[0]), sizeof(IdMapping) * num); + f.write(reinterpret_cast(&num), sizeof(size_t)); + f.write(reinterpret_cast(&_qidToId[0]), + sizeof(IdMapping) * num); f.close(); } @@ -1105,7 +1163,7 @@ std::string GeomCache::requestIndexHash() { if (httpCode != 200) { LOG(WARN) << "QLever backend returned status code " << httpCode - << " for index hash."; + << " for index hash."; return ""; } @@ -1117,12 +1175,13 @@ std::string GeomCache::requestIndexHash() { } // _____________________________________________________________________________ -void GeomCache::load(const std::string& cacheDir) { +void GeomCache::load(const std::string &cacheDir) { std::lock_guard guard(_m); if (_ready) { auto indexHash = requestIndexHash(); - if (_indexHash == indexHash) return; + if (_indexHash == indexHash) + return; LOG(INFO) << "Loaded index hash (" << _indexHash << ") and remote index hash (" << indexHash << ") dont match."; _ready = false;