diff --git a/docs/software/ledger_query_examples.md b/docs/software/ledger_query_examples.md index 777c8c8f99..bd7609bf19 100644 --- a/docs/software/ledger_query_examples.md +++ b/docs/software/ledger_query_examples.md @@ -12,63 +12,71 @@ ledger to JSON files for further analysis. * Dump entries modified in the 1000 most recent ledgers: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --last-ledgers 1000` + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --last-ledgers 1000` * Dump 1000 recently modified ledger entries (not necessarily the *most* recently modified): - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --limit 1000` + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --limit 1000` * Dump all the ledger entries with provided account ID or trustline ID: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --filter-query "data.account.accountID == 'GDNG6SVZAJHCFCH65R7SQDLGVR6FDAR67M7YDHEESXKRRZYBWVF4BEC5' || data.trustLine.accountID == 'GDNG6SVZAJHCFCH65R7SQDLGVR6FDAR67M7YDHEESXKRRZYBWVF4BEC5'" ` * Dump 1000 account entries that have non-empty `inflationDest` field: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --filter-query "data.account.inflationDest != NULL" --limit 1000` * Dump all the offer entries that trade lumens for any asset with code `'AABBG'` and have been modified within the last 1000 ledgers: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --filter-query "data.offer.selling == 'NATIVE' && data.offer.buying.assetCode == 'AABBG'" --last-ledgers 1000` * Dump 100 trustline entries that have buying liabilities lower than selling liabilities: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.json --filter-query "data.trustLine.ext.v1.liabilities.buying < data.trustLine.ext.v1.liabilities.selling" --limit 100` -* Dump 100 account entries that fullfill a more complex filter (this just demonstrates +* Dump 100 account entries that fulfill a more complex filter (this just demonstrates that filter supports logical expressions): - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q7.json --filter-query "(data.account.balance < 100000000 || data.account.balance >= 2000000000) && data.account.numSubEntries > 2" --limit 100` +* Output 10 entries larger than 200 bytes: + + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg + --output-file q8.json --filter-query "entry_size() > 200" --limit 10` + ## Aggregating ledger entries The following examples demonstrate how to aggregate parts of the ledger into CSV tables. * Find the count of every ledger entry type starting from the certain ledger seq: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.csv --filter-query "lastModifiedLedgerSeq >= 37872608" --group-by "data.type" --agg "count()"` * Dump the order book stats for the offers that have been modified during the last 100000 ledgers: - `stellar-core.exe dump-ledger --conf ../stellar-core_pubnet.cfg + `./stellar-core dump-ledger --conf ../stellar-core_pubnet.cfg --output-file q.csv --filter-query "data.type == 'OFFER'" --group-by "data.offer.selling, data.offer.selling.assetCode, data.offer.selling.issuer, data.offer.buying, data.offer.buying.assetCode, data.offer.buying.issuer" --agg "sum(data.offer.amount), avg(data.offer.amount), count()" --last-ledgers 100000` - \ No newline at end of file + +* Find the entry size distribution: + + `./stellar-core dump-ledger --output-file entry_stats.json --group-by data.type --agg sum(entry_size()),avg(entry_size())` \ No newline at end of file diff --git a/src/bucket/BucketManager.h b/src/bucket/BucketManager.h index abdb91db8d..94e9580614 100644 --- a/src/bucket/BucketManager.h +++ b/src/bucket/BucketManager.h @@ -373,7 +373,8 @@ class BucketManager : NonMovableOrCopyable virtual void visitLedgerEntries( HistoryArchiveState const& has, std::optional minLedger, std::function const& filterEntry, - std::function const& acceptEntry) = 0; + std::function const& acceptEntry, + bool includeAllStates) = 0; // Schedule a Work class that verifies the hashes of all referenced buckets // on background threads. diff --git a/src/bucket/BucketManagerImpl.cpp b/src/bucket/BucketManagerImpl.cpp index bf9afee0fd..c5d89dede1 100644 --- a/src/bucket/BucketManagerImpl.cpp +++ b/src/bucket/BucketManagerImpl.cpp @@ -1319,7 +1319,7 @@ BucketManagerImpl::mergeBuckets(HistoryArchiveState const& has) } static bool -visitEntriesInBucket(std::shared_ptr b, std::string const& name, +visitLiveEntriesInBucket(std::shared_ptr b, std::string const& name, std::optional minLedger, std::function const& filterEntry, std::function const& acceptEntry, @@ -1381,11 +1381,67 @@ visitEntriesInBucket(std::shared_ptr b, std::string const& name, return !stopIteration; } +static bool +visitAllEntriesInBucket( + std::shared_ptr b, std::string const& name, + std::optional minLedger, + std::function const& filterEntry, + std::function const& acceptEntry) +{ + ZoneScoped; + + using namespace std::chrono; + medida::Timer timer; + + bool stopIteration = false; + timer.Time([&]() { + for (BucketInputIterator in(b); in; ++in) + { + BucketEntry const& e = *in; + if (e.type() == LIVEENTRY || e.type() == INITENTRY) + { + auto const& liveEntry = e.liveEntry(); + if (minLedger && liveEntry.lastModifiedLedgerSeq < *minLedger) + { + stopIteration = true; + continue; + } + if (filterEntry(e.liveEntry())) + { + if (!acceptEntry(e.liveEntry())) + { + stopIteration = true; + break; + } + } + } + else + { + if (e.type() != DEADENTRY) + { + std::string err = "Malformed bucket: unexpected " + "non-INIT/LIVE/DEAD entry."; + CLOG_ERROR(Bucket, "{}", err); + throw std::runtime_error(err); + } + } + } + }); + nanoseconds ns = + timer.duration_unit() * static_cast(timer.max()); + milliseconds ms = duration_cast(ns); + size_t bytesPerSec = (b->getSize() * 1000 / (1 + ms.count())); + CLOG_INFO(Bucket, "Processed {}-byte bucket file '{}' in {} ({}/s)", + b->getSize(), name, ms, formatSize(bytesPerSec)); + return !stopIteration; +} + void BucketManagerImpl::visitLedgerEntries( HistoryArchiveState const& has, std::optional minLedger, std::function const& filterEntry, - std::function const& acceptEntry) + std::function const& acceptEntry, + bool includeAllStates) { ZoneScoped; @@ -1413,8 +1469,14 @@ BucketManagerImpl::visitLedgerEntries( throw std::runtime_error(std::string("missing bucket: ") + binToHex(pair.first)); } - if (!visitEntriesInBucket(b, pair.second, minLedger, filterEntry, - acceptEntry, deletedEntries)) + bool continueIteration = + includeAllStates + ? visitAllEntriesInBucket(b, pair.second, minLedger, + filterEntry, acceptEntry) + : visitLiveEntriesInBucket(b, pair.second, minLedger, + filterEntry, acceptEntry, + deletedEntries); + if (!continueIteration) { break; } diff --git a/src/bucket/BucketManagerImpl.h b/src/bucket/BucketManagerImpl.h index 0643171b0b..a347692ca2 100644 --- a/src/bucket/BucketManagerImpl.h +++ b/src/bucket/BucketManagerImpl.h @@ -182,7 +182,8 @@ class BucketManagerImpl : public BucketManager void visitLedgerEntries( HistoryArchiveState const& has, std::optional minLedger, std::function const& filterEntry, - std::function const& acceptEntry) override; + std::function const& acceptEntry, + bool includeAllStates) override; std::shared_ptr scheduleVerifyReferencedBucketsWork() override; diff --git a/src/ledger/LedgerTxn.cpp b/src/ledger/LedgerTxn.cpp index b867859c3b..a450613b5f 100644 --- a/src/ledger/LedgerTxn.cpp +++ b/src/ledger/LedgerTxn.cpp @@ -25,9 +25,11 @@ #include "xdr/Stellar-ledger-entries.h" #include "xdrpp/marshal.h" #include -#include #include +#include +#include + namespace stellar { diff --git a/src/main/ApplicationUtils.cpp b/src/main/ApplicationUtils.cpp index 7338bbbac5..5f33fce2a5 100644 --- a/src/main/ApplicationUtils.cpp +++ b/src/main/ApplicationUtils.cpp @@ -55,7 +55,7 @@ writeLedgerAggregationTable( std::vector keyFields; if (groupByExtractor) { - keyFields = groupByExtractor->getFieldNames(); + keyFields = groupByExtractor->getColumnNames(); for (auto const& keyField : keyFields) { ofs << keyField << ","; @@ -742,7 +742,7 @@ dumpLedger(Config cfg, std::string const& outputFile, std::optional filterQuery, std::optional lastModifiedLedgerCount, std::optional limit, std::optional groupBy, - std::optional aggregate) + std::optional aggregate, bool includeAllStates) { if (groupBy && !aggregate) { @@ -820,7 +820,8 @@ dumpLedger(Config cfg, std::string const& outputFile, } ++entryCount; return !limit || entryCount < *limit; - }); + }, + includeAllStates); } catch (xdrquery::XDRQueryError& e) { diff --git a/src/main/ApplicationUtils.h b/src/main/ApplicationUtils.h index 3497d6f471..30d2cb0fed 100644 --- a/src/main/ApplicationUtils.h +++ b/src/main/ApplicationUtils.h @@ -34,7 +34,7 @@ int dumpLedger(Config cfg, std::string const& outputFile, std::optional lastModifiedLedgerCount, std::optional limit, std::optional groupBy, - std::optional aggregate); + std::optional aggregate, bool includeAllStates); void showOfflineInfo(Config cfg, bool verbose); int reportLastHistoryCheckpoint(Config cfg, std::string const& outputFile); diff --git a/src/main/CommandLine.cpp b/src/main/CommandLine.cpp index 67dc6cf239..24d1eb9cc5 100644 --- a/src/main/CommandLine.cpp +++ b/src/main/CommandLine.cpp @@ -458,6 +458,13 @@ limitParser(std::optional& limit) "process only this many recent ledger entries (not *most* recent)"); } +clara::Opt +includeAllStatesParser(bool& include) +{ + return clara::Opt{include}["--include-all-states"]( + "include all non-dead states of the entry into query results"); +} + int runWithHelp(CommandLineArgs const& args, std::vector parsers, std::function f) @@ -1188,19 +1195,20 @@ runDumpLedger(CommandLineArgs const& args) std::optional limit; std::optional groupBy; std::optional aggregate; - return runWithHelp(args, - {configurationParser(configOption), - outputFileParser(outputFile).required(), - filterQueryParser(filterQuery), - lastModifiedLedgerCountParser(lastModifiedLedgerCount), - limitParser(limit), groupByParser(groupBy), - aggregateParser(aggregate)}, - [&] { - return dumpLedger(configOption.getConfig(), - outputFile, filterQuery, - lastModifiedLedgerCount, limit, - groupBy, aggregate); - }); + bool includeAllStates = false; + return runWithHelp( + args, + {configurationParser(configOption), + outputFileParser(outputFile).required(), + filterQueryParser(filterQuery), + lastModifiedLedgerCountParser(lastModifiedLedgerCount), + limitParser(limit), groupByParser(groupBy), aggregateParser(aggregate), + includeAllStatesParser(includeAllStates)}, + [&] { + return dumpLedger(configOption.getConfig(), outputFile, filterQuery, + lastModifiedLedgerCount, limit, groupBy, + aggregate, includeAllStates); + }); } int diff --git a/src/util/xdrquery/XDRQuery.cpp b/src/util/xdrquery/XDRQuery.cpp index d378aee824..0a60e196ff 100644 --- a/src/util/xdrquery/XDRQuery.cpp +++ b/src/util/xdrquery/XDRQuery.cpp @@ -15,9 +15,9 @@ XDRFieldExtractor::XDRFieldExtractor(std::string const& query) : mQuery(query) } std::vector -XDRFieldExtractor::getFieldNames() const +XDRFieldExtractor::getColumnNames() const { - return mFieldList->getFieldNames(); + return mFieldList->getColumnNames(); } XDRAccumulator::XDRAccumulator(std::string const& query) : mQuery(query) diff --git a/src/util/xdrquery/XDRQuery.h b/src/util/xdrquery/XDRQuery.h index 21e1dd3bda..70c5c3a457 100644 --- a/src/util/xdrquery/XDRQuery.h +++ b/src/util/xdrquery/XDRQuery.h @@ -14,17 +14,45 @@ namespace xdrquery { +// Concrete implementation of DynamicXDRGetter for a given XDR message type T. template -FieldResolver -createFieldResolver(T const& xdrMessage, bool validate) +class TypedDynamicXDRGetterResolver : public DynamicXDRGetter { - return [&xdrMessage, validate](std::vector const& fieldPath) { - if (validate) + public: + TypedDynamicXDRGetterResolver(T const& xdrMessage, bool validate) + : mXdrMessage(xdrMessage), mValidate(validate) + { + } + + ResultType + getField(std::vector const& fieldPath) const override + { + if (mValidate) { - return getXDRFieldValidated(xdrMessage, fieldPath); + return getXDRFieldValidated(mXdrMessage, fieldPath); } - return getXDRField(xdrMessage, fieldPath); - }; + return getXDRField(mXdrMessage, fieldPath); + } + + uint64_t + getSize() const override + { + return xdr::xdr_size(mXdrMessage); + } + + ~TypedDynamicXDRGetterResolver() override = default; + + private: + T const& mXdrMessage; + bool mValidate; +}; + +template +std::unique_ptr +createXDRGetter(T const& xdrMessage, bool validate) +{ + return std::make_unique>(xdrMessage, + validate); } // Helper to match multiple XDR messages of the same type using the provided @@ -57,7 +85,7 @@ class XDRMatcher } mEvalRoot = std::get>(statement); } - return mEvalRoot->evalBool(createFieldResolver(xdrMessage, firstEval)); + return mEvalRoot->evalBool(*createXDRGetter(xdrMessage, firstEval)); } private: @@ -86,23 +114,22 @@ class XDRFieldExtractor { firstEval = true; auto statement = parseXDRQuery(mQuery); - if (!std::holds_alternative>(statement)) + if (!std::holds_alternative>(statement)) { throw XDRQueryError( "The query doesn't evaluate to field list."); } - mFieldList = std::get>(statement); + mFieldList = std::get>(statement); } - return mFieldList->getValues( - createFieldResolver(xdrMessage, firstEval)); + return mFieldList->getValues(*createXDRGetter(xdrMessage, firstEval)); } // Gets names of the fields from the query. - std::vector getFieldNames() const; + std::vector getColumnNames() const; private: std::string mQuery; - std::shared_ptr mFieldList; + std::shared_ptr mFieldList; }; // Helper that allows aggregating values of fields in multiple XDR messages @@ -137,7 +164,7 @@ class XDRAccumulator mAccumulatorList = std::get>(statement); } - mAccumulatorList->addEntry(createFieldResolver(xdrMessage, firstEval)); + mAccumulatorList->addEntry(*createXDRGetter(xdrMessage, firstEval)); } // Gets the accumulators with aggregated values of each field. diff --git a/src/util/xdrquery/XDRQueryEval.cpp b/src/util/xdrquery/XDRQueryEval.cpp index db7618ffdc..8be1db7b20 100644 --- a/src/util/xdrquery/XDRQueryEval.cpp +++ b/src/util/xdrquery/XDRQueryEval.cpp @@ -55,7 +55,7 @@ LiteralNode::LiteralNode(LiteralNodeType valueType, std::string const& val) } ResultType -LiteralNode::eval(FieldResolver const& fieldResolver) const +LiteralNode::eval(DynamicXDRGetter const& xdrGetter) const { return mValue; } @@ -67,15 +67,15 @@ LiteralNode::getType() const } void -LiteralNode::resolveIntType(ResultValueType const& fieldValue, - std::vector const& fieldPath) const +LiteralNode::resolveIntType(ResultValueType const& columnValue, + std::string const& columnName) const { - if (std::holds_alternative(fieldValue)) + if (std::holds_alternative(columnValue)) { std::string valueStr = resultToString(*mValue); throw XDRQueryError(fmt::format( - FMT_STRING("String field '{}' is compared with int value: {}."), - fmt::join(fieldPath, "."), valueStr)); + FMT_STRING("String column '{}' is compared with int value: {}."), + columnName, valueStr)); } std::string valueStr = std::get(*mValue); try @@ -111,16 +111,16 @@ LiteralNode::resolveIntType(ResultValueType const& fieldValue, std::in_place_type, std::stoull(valueStr)); else { - throw std::runtime_error("Unexpected field type."); + throw std::runtime_error("Unexpected column type."); } }, - fieldValue); + columnValue); } catch (std::out_of_range&) { throw XDRQueryError(fmt::format( - FMT_STRING("Value for field '{}' is out of type range: {}."), - fmt::join(fieldPath, "."), valueStr)); + FMT_STRING("Value for column '{}' is out of type range: {}."), + columnName, valueStr)); } } @@ -130,21 +130,45 @@ FieldNode::FieldNode(std::string const& initField) } ResultType -FieldNode::eval(FieldResolver const& fieldResolver) const +FieldNode::eval(DynamicXDRGetter const& xdrGetter) const { - return fieldResolver(mFieldPath); + return xdrGetter.getField(mFieldPath); } EvalNodeType FieldNode::getType() const { - return EvalNodeType::FIELD; + return EvalNodeType::COLUMN; +} + +std::string +FieldNode::getName() const +{ + return fmt::to_string(fmt::join(mFieldPath, ".")); +} + +ResultType +EntrySizeNode::eval(DynamicXDRGetter const& xdrGetter) const +{ + return xdrGetter.getSize(); +} + +EvalNodeType +EntrySizeNode::getType() const +{ + return EvalNodeType::COLUMN; +} + +std::string +EntrySizeNode::getName() const +{ + return "entry_size"; } ResultType -BoolEvalNode::eval(FieldResolver const& fieldResolver) const +BoolEvalNode::eval(DynamicXDRGetter const& xdrGetter) const { - return evalBool(fieldResolver); + return evalBool(xdrGetter); } BoolOpNode::BoolOpNode(BoolOpNodeType nodeType, @@ -155,16 +179,14 @@ BoolOpNode::BoolOpNode(BoolOpNodeType nodeType, } bool -BoolOpNode::evalBool(FieldResolver const& fieldResolver) const +BoolOpNode::evalBool(DynamicXDRGetter const& xdrGetter) const { switch (mType) { case BoolOpNodeType::AND: - return mLeft->evalBool(fieldResolver) && - mRight->evalBool(fieldResolver); + return mLeft->evalBool(xdrGetter) && mRight->evalBool(xdrGetter); case BoolOpNodeType::OR: - return mLeft->evalBool(fieldResolver) || - mRight->evalBool(fieldResolver); + return mLeft->evalBool(xdrGetter) || mRight->evalBool(xdrGetter); } } @@ -179,9 +201,9 @@ ComparisonNode::ComparisonNode(ComparisonNodeType nodeType, std::shared_ptr right) : mType(nodeType), mLeft(std::move(left)), mRight(std::move(right)) { - // Keep the field as the left argument for simplicity of type check during + // Keep the column as the left argument for simplicity of type check during // evaluation. - if (mRight->getType() == EvalNodeType::FIELD) + if (mRight->getType() == EvalNodeType::COLUMN) { std::swap(mLeft, mRight); // Invert the operation as we have swapped operands. @@ -206,10 +228,10 @@ ComparisonNode::ComparisonNode(ComparisonNodeType nodeType, } bool -ComparisonNode::evalBool(FieldResolver const& fieldResolver) const +ComparisonNode::evalBool(DynamicXDRGetter const& xdrGetter) const { auto leftType = mLeft->getType(); - auto leftVal = mLeft->eval(fieldResolver); + auto leftVal = mLeft->eval(xdrGetter); if (!leftVal) { @@ -217,20 +239,20 @@ ComparisonNode::evalBool(FieldResolver const& fieldResolver) const } auto rightType = mRight->getType(); - if (leftType == EvalNodeType::FIELD && rightType == EvalNodeType::LITERAL) + if (leftType == EvalNodeType::COLUMN && rightType == EvalNodeType::LITERAL) { - // Lazily resolve the type of the int literal using the field type. + // Lazily resolve the type of the int literal using the column type. // This allows to correctly check the literal range and simplifies the // comparisons. auto* lit = static_cast(mRight.get()); if (lit->mType == LiteralNodeType::INT && std::holds_alternative(*lit->mValue)) { - auto* field = static_cast(mLeft.get()); - lit->resolveIntType(*leftVal, field->mFieldPath); + auto* column = static_cast(mLeft.get()); + lit->resolveIntType(*leftVal, column->getName()); } } - auto rightVal = mRight->eval(fieldResolver); + auto rightVal = mRight->eval(xdrGetter); if (!rightVal) { return false; @@ -297,8 +319,8 @@ Accumulator::Accumulator(AccumulatorType nodeType) } Accumulator::Accumulator(AccumulatorType nodeType, - std::shared_ptr field) - : mType(nodeType), mField(field) + std::shared_ptr column) + : mType(nodeType), mColumn(column) { switch (mType) { @@ -313,13 +335,13 @@ Accumulator::Accumulator(AccumulatorType nodeType, } void -Accumulator::addEntry(FieldResolver const& fieldResolver) +Accumulator::addEntry(DynamicXDRGetter const& xdrGetter) { - ResultType fieldValue; + ResultType columnValue; if (mType != AccumulatorType::COUNT) { - fieldValue = mField->eval(fieldResolver); - if (!fieldValue) + columnValue = mColumn->eval(xdrGetter); + if (!columnValue) { return; } @@ -338,10 +360,10 @@ Accumulator::addEntry(FieldResolver const& fieldResolver) } else { - throw XDRQueryError("Encountered non-aggregatable field."); + throw XDRQueryError("Encountered non-aggregatable column."); } }, - *fieldValue); + *columnValue); } break; case AccumulatorType::SUM: @@ -354,10 +376,10 @@ Accumulator::addEntry(FieldResolver const& fieldResolver) } else { - throw XDRQueryError("Encountered non-aggregatable field."); + throw XDRQueryError("Encountered non-aggregatable column."); } }, - *fieldValue); + *columnValue); break; case AccumulatorType::COUNT: break; @@ -388,11 +410,9 @@ Accumulator::getName() const switch (mType) { case AccumulatorType::AVERAGE: - return fmt::format(FMT_STRING("avg({})"), - fmt::join(mField->mFieldPath, ".")); + return fmt::format(FMT_STRING("avg({})"), mColumn->getName()); case AccumulatorType::SUM: - return fmt::format(FMT_STRING("sum({})"), - fmt::join(mField->mFieldPath, ".")); + return fmt::format(FMT_STRING("sum({})"), mColumn->getName()); case AccumulatorType::COUNT: return "count"; } @@ -410,11 +430,11 @@ AccumulatorList::addAccumulator(std::shared_ptr accumulator) } void -AccumulatorList::addEntry(FieldResolver const& fieldResolver) const +AccumulatorList::addEntry(DynamicXDRGetter const& xdrGetter) const { for (auto const& accumulator : mAccumulators) { - accumulator->addEntry(fieldResolver); + accumulator->addEntry(xdrGetter); } } @@ -424,37 +444,37 @@ AccumulatorList::getAccumulators() const return mAccumulators; } -FieldList::FieldList(std::shared_ptr field) +ColumnList::ColumnList(std::shared_ptr column) { - mFields.emplace_back(field); + mColumns.emplace_back(column); } void -FieldList::addField(std::shared_ptr field) +ColumnList::addColumn(std::shared_ptr column) { - mFields.emplace_back(field); + mColumns.emplace_back(column); } std::vector -FieldList::getValues(FieldResolver const& fieldResolver) const +ColumnList::getValues(DynamicXDRGetter const& xdrGetter) const { std::vector res; - res.reserve(mFields.size()); - for (auto const& field : mFields) + res.reserve(mColumns.size()); + for (auto const& column : mColumns) { - res.emplace_back(field->eval(fieldResolver)); + res.emplace_back(column->eval(xdrGetter)); } return res; } std::vector -FieldList::getFieldNames() const +ColumnList::getColumnNames() const { std::vector names; - names.reserve(mFields.size()); - for (auto const& field : mFields) + names.reserve(mColumns.size()); + for (auto const& column : mColumns) { - names.emplace_back(fmt::to_string(fmt::join(field->mFieldPath, "."))); + names.emplace_back(column->getName()); } return names; } diff --git a/src/util/xdrquery/XDRQueryEval.h b/src/util/xdrquery/XDRQueryEval.h index 06ecee37d3..798d5715b9 100644 --- a/src/util/xdrquery/XDRQueryEval.h +++ b/src/util/xdrquery/XDRQueryEval.h @@ -12,8 +12,8 @@ #include #include -// This is a simple engine for evaluating boolean expresions containing literals -// and XDR fields. +// This is a simple engine for evaluating boolean expressions containing +// literals and XDR fields. namespace xdrquery { // This type represents an optional XDR field that is not set. @@ -41,16 +41,25 @@ using ResultValueType = std::variant; -// A function that resolves the field path to an actual value. -using FieldResolver = - std::function const&)>; +// An interface for getting information from XDR structs. +struct DynamicXDRGetter +{ + // Gets a field specified by the provided path. + virtual ResultType + getField(std::vector const& fieldPath) const = 0; + + // Gets the serialized XDR size of the entire struct. + virtual uint64_t getSize() const = 0; + + virtual ~DynamicXDRGetter() = default; +}; std::string resultToString(ResultValueType const& result); enum class EvalNodeType { LITERAL, - FIELD, + COLUMN, BOOL_OP, COMPARISON_OP }; @@ -58,7 +67,7 @@ enum class EvalNodeType // Expression node that can be evaluated. struct EvalNode { - virtual ResultType eval(FieldResolver const& fieldResolver) const = 0; + virtual ResultType eval(DynamicXDRGetter const& xdrGetter) const = 0; virtual EvalNodeType getType() const = 0; virtual ~EvalNode() = default; @@ -76,38 +85,57 @@ struct LiteralNode : public EvalNode { LiteralNode(LiteralNodeType valueType, std::string const& val); - ResultType eval(FieldResolver const& fieldResolver) const override; + ResultType eval(DynamicXDRGetter const& xdrGetter) const override; EvalNodeType getType() const override; - // We only resolve integer literals when they're compared to XDR fields and + // We only resolve integer literals when they're compared to columns and // for simplicity do that lazily via calling this function in eval(). Hence // it has to be `const` and `mValue` has to be mutable. - void resolveIntType(ResultValueType const& fieldValue, - std::vector const& fieldPath) const; + void resolveIntType(ResultValueType const& columnValue, + std::string const& columnName) const; LiteralNodeType mType; mutable ResultType mValue; }; +// Node representing a 'column': a value inferred from an XDR entry, +// such as a field or the size of the entry. +struct ColumnNode : public EvalNode +{ + virtual std::string getName() const = 0; +}; + // Node representing an XDR field in expression. -struct FieldNode : public EvalNode +struct FieldNode : public ColumnNode { FieldNode(std::string const& initField); - ResultType eval(FieldResolver const& fieldResolver) const override; + ResultType eval(DynamicXDRGetter const& xdrGetter) const override; EvalNodeType getType() const override; + virtual std::string getName() const override; + std::vector mFieldPath; }; +// Node representing the size of an XDR entry in expression. +struct EntrySizeNode : public ColumnNode +{ + ResultType eval(DynamicXDRGetter const& xdrGetter) const override; + + EvalNodeType getType() const override; + + virtual std::string getName() const override; +}; + // `EvalNode` that always has a `bool` evaluation result. struct BoolEvalNode : public EvalNode { - ResultType eval(FieldResolver const& fieldResolver) const override; + ResultType eval(DynamicXDRGetter const& xdrGetter) const override; - virtual bool evalBool(FieldResolver const& fieldResolver) const = 0; + virtual bool evalBool(DynamicXDRGetter const& xdrGetter) const = 0; }; enum class BoolOpNodeType @@ -122,7 +150,7 @@ struct BoolOpNode : public BoolEvalNode BoolOpNode(BoolOpNodeType nodeType, std::shared_ptr left, std::shared_ptr right); - bool evalBool(FieldResolver const& fieldResolver) const override; + bool evalBool(DynamicXDRGetter const& xdrGetter) const override; EvalNodeType getType() const override; @@ -148,7 +176,7 @@ struct ComparisonNode : public BoolEvalNode ComparisonNode(ComparisonNodeType nodeType, std::shared_ptr left, std::shared_ptr right); - bool evalBool(FieldResolver const& fieldResolver) const override; + bool evalBool(DynamicXDRGetter const& xdrGetter) const override; EvalNodeType getType() const override; @@ -172,9 +200,9 @@ using AccumulatorResultType = std::variant; struct Accumulator { explicit Accumulator(AccumulatorType nodeType); - Accumulator(AccumulatorType nodeType, std::shared_ptr field); + Accumulator(AccumulatorType nodeType, std::shared_ptr column); - void addEntry(FieldResolver const& fieldResolver); + void addEntry(DynamicXDRGetter const& xdrGetter); AccumulatorResultType getValue() const; std::string getName() const; @@ -182,7 +210,7 @@ struct Accumulator private: AccumulatorType mType; AccumulatorResultType mValue; - std::shared_ptr mField; + std::shared_ptr mColumn; uint64_t mCount = 0; }; @@ -192,7 +220,7 @@ struct AccumulatorList void addAccumulator(std::shared_ptr accumulator); - void addEntry(FieldResolver const& fieldResolver) const; + void addEntry(DynamicXDRGetter const& xdrGetter) const; std::vector> const& getAccumulators() const; @@ -200,20 +228,21 @@ struct AccumulatorList std::vector> mAccumulators; }; -struct FieldList +// Node representing a list of columns. +struct ColumnList { - explicit FieldList(std::shared_ptr field); + explicit ColumnList(std::shared_ptr column); - void addField(std::shared_ptr field); + void addColumn(std::shared_ptr column); - std::vector getValues(FieldResolver const& fieldResolver) const; - std::vector getFieldNames() const; + std::vector getValues(DynamicXDRGetter const& xdrGetter) const; + std::vector getColumnNames() const; private: - std::vector> mFields; + std::vector> mColumns; }; using XDRQueryStatement = std::variant, - std::shared_ptr, std::shared_ptr>; + std::shared_ptr, std::shared_ptr>; } // namespace xdrquery diff --git a/src/util/xdrquery/XDRQueryParser.yy b/src/util/xdrquery/XDRQueryParser.yy index 6fe7928e03..84ebca4930 100644 --- a/src/util/xdrquery/XDRQueryParser.yy +++ b/src/util/xdrquery/XDRQueryParser.yy @@ -40,6 +40,7 @@ parseXDRQuery(std::string const& query); %token SUM %token AVG %token COUNT +%token ENTRY_SIZE %token AND "&&" %token OR "||" @@ -65,18 +66,20 @@ parseXDRQuery(std::string const& query); %type > literal operand %type > comparison_expr logic_expr +%type > column %type > field +%type > entry_size %type > accumulator %type > accumulator_list -%type > field_list +%type > column_list %% statement: logic_expr { root = std::move($1); } | accumulator_list { root = std::move($1); } - | field_list { root = std::move($1); } + | column_list { root = std::move($1); } logic_expr: comparison_expr { $$ = std::move($1); } | "(" logic_expr ")" { $$ = std::move($2); } @@ -107,32 +110,36 @@ comparison_expr: operand "==" operand { std::move($1), std::move($3)); } operand: literal { $$ = std::move($1); } - | field { $$ = std::move($1); } + | column { $$ = std::move($1); } literal: INT { $$ = std::make_shared(LiteralNodeType::INT, $1); } | STR { $$ = std::make_shared(LiteralNodeType::STR, $1); } | NULL { $$ = std::make_shared(LiteralNodeType::NULL_LITERAL, ""); } +column: field { $$ = std::move($1); } + | entry_size { $$ = std::move($1); } + +entry_size: ENTRY_SIZE "(" ")" { $$ = std::make_shared(); } + field: ID { $$ = std::make_shared($1); } | field "." ID { $1->mFieldPath.push_back($3); $$ = std::move($1); } - accumulator_list: accumulator { $$ = std::make_shared(std::move($1)); } | accumulator_list "," accumulator { $1->addAccumulator($3); $$ = std::move($1); } accumulator: COUNT "(" ")" { $$ = std::make_shared(AccumulatorType::COUNT); } - | SUM "(" field ")" { + | SUM "(" column ")" { $$ = std::make_shared( AccumulatorType::SUM, std::move($3)); } - | AVG "(" field ")" { + | AVG "(" column ")" { $$ = std::make_shared( AccumulatorType::AVERAGE, std::move($3)); } -field_list: field { $$ = std::make_shared(std::move($1)); } - | field_list "," field { $1->addField($3); $$ = std::move($1); } +column_list: column { $$ = std::make_shared(std::move($1)); } + | column_list "," column { $1->addColumn($3); $$ = std::move($1); } %% diff --git a/src/util/xdrquery/XDRQueryScanner.ll b/src/util/xdrquery/XDRQueryScanner.ll index ecceb5391c..0ab4d7da58 100644 --- a/src/util/xdrquery/XDRQueryScanner.ll +++ b/src/util/xdrquery/XDRQueryScanner.ll @@ -37,6 +37,7 @@ NULL { return xdrquery::XDRQueryParser::make_NULL(); } sum { return xdrquery::XDRQueryParser::make_SUM(); } avg { return xdrquery::XDRQueryParser::make_AVG(); } count { return xdrquery::XDRQueryParser::make_COUNT(); } +entry_size { return xdrquery::XDRQueryParser::make_ENTRY_SIZE(); } {IDENTIFIER} { return xdrquery::XDRQueryParser::make_ID(yytext); } {INT} { return xdrquery::XDRQueryParser::make_INT(yytext); } diff --git a/src/util/xdrquery/test/XDRQueryTests.cpp b/src/util/xdrquery/test/XDRQueryTests.cpp index f0c2b0d1c9..f2fc567907 100644 --- a/src/util/xdrquery/test/XDRQueryTests.cpp +++ b/src/util/xdrquery/test/XDRQueryTests.cpp @@ -301,6 +301,7 @@ TEST_CASE("XDR matcher", "[xdrquery]") makeAccountEntry(100), makeAccountEntry(200), makeOfferEntry("foo"), makeOfferEntry("foobar")}; entries[1].data.account().inflationDest.reset(); + // Entry sizes: 192, 156, 128, 136 auto testMatches = [&](std::string const& query, std::vector const& expectedMatches) { @@ -344,6 +345,16 @@ TEST_CASE("XDR matcher", "[xdrquery]") testMatches("NULL != data.account.inflationDest", {true, false, false, false}); } + + SECTION("entry size") + { + testMatches("entry_size() == 192", {true, false, false, false}); + testMatches("156 != entry_size()", {true, false, true, true}); + testMatches("entry_size() > 136", {true, true, false, false}); + testMatches("entry_size() < 192", {false, true, true, true}); + testMatches("entry_size() <= 192", {true, true, true, true}); + testMatches("156 >= entry_size()", {false, true, true, true}); + } } SECTION("queries with operators") @@ -368,6 +379,8 @@ TEST_CASE("XDR matcher", "[xdrquery]") testMatches("data.offer.selling.assetCode == 'foo' && data.type != " "'TRUSTLINE'", {false, false, true, false}); + testMatches("data.account.balance >= 100 && entry_size() > 150", + {true, true, false, false}); } SECTION("mixed operators") @@ -432,6 +445,7 @@ TEST_CASE("XDR matcher", "[xdrquery]") REQUIRE_THROWS_AS(runQuery("data.account == 123"), XDRQueryError); REQUIRE_THROWS_AS(runQuery("data.account.balance == '123'"), XDRQueryError); + REQUIRE_THROWS_AS(runQuery("entry_size() == '123'"), XDRQueryError); } SECTION("int out of range") @@ -516,7 +530,7 @@ TEST_CASE("XDR field extractor", "[xdrquery]") "data.account.thresholds, " "data.offer.selling.assetCode,data.account.balance"); matcher.extractFields(entries[0]); - REQUIRE(matcher.getFieldNames() == + REQUIRE(matcher.getColumnNames() == std::vector{"data.account.thresholds", "data.offer.selling.assetCode", "data.account.balance"}); @@ -587,17 +601,18 @@ TEST_CASE("XDR accumulator", "[xdrquery]") SECTION("multiple aggregations") { - testAggregation( - "avg(data.account.balance), sum(data.account.balance),count()", - {AccumulatorResultType(1305. / 4.), - AccumulatorResultType(uint64_t(1305)), - AccumulatorResultType(uint64_t(4))}); + testAggregation("avg(data.account.balance), sum(data.account.balance), " + "sum(entry_size()), count()", + {AccumulatorResultType(1305. / 4.), + AccumulatorResultType(uint64_t(1305)), + AccumulatorResultType(uint64_t(192 * 4)), + AccumulatorResultType(uint64_t(4))}); } SECTION("field names") { - XDRAccumulator accumulator( - "count(),avg(data.account.balance),sum(data.account.balance)"); + XDRAccumulator accumulator("count(),sum(entry_size()),avg(data.account." + "balance),sum(data.account.balance)"); accumulator.addEntry(entries[0]); std::vector accNames; for (auto const& acc : accumulator.getAccumulators()) @@ -605,7 +620,8 @@ TEST_CASE("XDR accumulator", "[xdrquery]") accNames.push_back(acc->getName()); } REQUIRE(accNames == - std::vector{"count", "avg(data.account.balance)", + std::vector{"count", "sum(entry_size)", + "avg(data.account.balance)", "sum(data.account.balance)"}); }