From 4148c38dba2d16c9d133e76acf0438ee15d32d44 Mon Sep 17 00:00:00 2001 From: piotrkosecki Date: Mon, 9 Aug 2021 18:09:27 +0200 Subject: [PATCH 1/2] Adding possibility of using "distinct" in the aggregation --- .../data/tezos/TezosDataOperationsTest.scala | 59 +++++++++++++++---- .../common/generic/chain/DataTypes.scala | 4 +- .../conseil/common/util/DatabaseUtil.scala | 26 ++++---- .../tezos/bigmaps/BigMapsOperations.scala | 6 +- 4 files changed, 70 insertions(+), 25 deletions(-) diff --git a/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala b/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala index 08281c676..bf3a22a98 100644 --- a/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala +++ b/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala @@ -6,18 +6,11 @@ import com.softwaremill.diffx.scalatest.DiffMatcher import org.scalatest.concurrent.IntegrationPatience import slick.jdbc.PostgresProfile.api._ import tech.cryptonomic.conseil.api.TezosInMemoryDatabaseSetup -import tech.cryptonomic.conseil.common.generic.chain.DataTypes.{Query, _} +import tech.cryptonomic.conseil.common.generic.chain.DataTypes.{Query, SimpleField, _} import tech.cryptonomic.conseil.common.testkit.{ConseilSpec, InMemoryDatabase} -import tech.cryptonomic.conseil.common.tezos.Tables.{ - AccountsHistoryRow, - AccountsRow, - BlocksRow, - FeesRow, - OperationGroupsRow, - OperationsRow -} +import tech.cryptonomic.conseil.common.tezos.Tables.{AccountsHistoryRow, AccountsRow, BlocksRow, FeesRow, OperationGroupsRow, OperationsRow} import tech.cryptonomic.conseil.common.tezos.{Fork, Tables} -import tech.cryptonomic.conseil.common.tezos.TezosTypes.{makeAccountId, TezosBlockHash} +import tech.cryptonomic.conseil.common.tezos.TezosTypes.{TezosBlockHash, makeAccountId} import scala.concurrent.duration._ @@ -2111,7 +2104,7 @@ class TezosDataOperationsTest Aggregation( field = "medium", function = AggregationType.count, - Some( + predicate = Some( AggregationPredicate( operation = OperationType.gt, set = List(1), @@ -2169,7 +2162,7 @@ class TezosDataOperationsTest Aggregation( field = "medium", function = AggregationType.count, - Some( + predicate = Some( AggregationPredicate( operation = OperationType.gt, set = List(0), @@ -2180,7 +2173,7 @@ class TezosDataOperationsTest Aggregation( field = "low", function = AggregationType.sum, - Some( + predicate = Some( AggregationPredicate( operation = OperationType.eq, set = List(0), @@ -2271,6 +2264,46 @@ class TezosDataOperationsTest } + "aggregate with distinct count aggregation" in { + val feesTmp = List( + FeesRow(1, 1, 1, Timestamp.valueOf("2000-01-01 00:00:00"), "kind", forkId = Fork.mainForkId), + FeesRow(2, 1, 1, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId), + FeesRow(3, 2, 1, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId), + FeesRow(4, 2, 1, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId), + FeesRow(5, 3, 2, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId), + FeesRow(6, 3, 2, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId) + ) + + val aggregate = List( + Aggregation("low", AggregationType.count), + Aggregation("medium", AggregationType.count, Some(true)) + ) + + val populateAndTest = for { + _ <- Tables.Fees ++= feesTmp + found <- sut.selectWithPredicates( + "tezos", + table = Tables.Fees.baseTableRow.tableName, + columns = List(SimpleField("low"), SimpleField("medium"), SimpleField("high")), + predicates = List.empty, + ordering = List(QueryOrdering("count_medium", OrderDirection.desc)), + aggregation = aggregate, + temporalPartition = None, + snapshot = None, + outputType = OutputType.json, + limit = 10 + ) + } yield found + + val result = dbHandler.run(populateAndTest.transactionally).futureValue + + result shouldBe List( + Map("count_low" -> Some(4), "count_medium" -> Some(2), "high" -> Some(1)), + Map("count_low" -> Some(2), "count_medium" -> Some(1), "high" -> Some(2)) + ) + } + + "map date with datePart aggregation when it is only type of aggregation" in { val feesTmp = List( FeesRow(0, 1, 4, Timestamp.valueOf("2000-01-01 00:00:00"), "kind", forkId = Fork.mainForkId), diff --git a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala index 06d3abff5..e70346dc4 100644 --- a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala +++ b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala @@ -163,18 +163,20 @@ object DataTypes { case class ApiAggregation( field: String, function: AggregationType = AggregationType.sum, + distinct: Option[Boolean] = None, predicate: Option[ApiAggregationPredicate] = None ) { /** Transforms Aggregation received form API into Aggregation */ def toAggregation: Aggregation = - Aggregation(field, function, predicate.map(_.toAggregationPredicate)) + Aggregation(field, function, distinct, predicate.map(_.toAggregationPredicate)) } /** Class representing aggregation */ case class Aggregation( field: String, function: AggregationType = AggregationType.sum, + distinct: Option[Boolean] = None, predicate: Option[AggregationPredicate] = None ) { diff --git a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala index b76f03002..edf4cb297 100644 --- a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala +++ b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala @@ -184,7 +184,7 @@ object DatabaseUtil { limit: Int ): SQLActionBuilder = { val aggregationFields = aggregations.map { aggregation => - mapAggregationToSQL(aggregation.function, aggregation.field) + " as " + mapAggregationToAlias( + mapAggregationToSQL(aggregation, aggregation.field) + " as " + mapAggregationToAlias( aggregation.function, aggregation.field ) @@ -259,7 +259,7 @@ object DatabaseUtil { */ def makeQuery(table: String, columns: List[Field], aggregations: List[Aggregation]): SQLActionBuilder = { val aggregationFields = aggregations.map { aggregation => - mapAggregationToSQL(aggregation.function, aggregation.field) + " as " + mapAggregationToAlias( + mapAggregationToSQL(aggregation, aggregation.field) + " as " + mapAggregationToAlias( aggregation.function, aggregation.field ) @@ -311,7 +311,7 @@ object DatabaseUtil { aggregation.flatMap { aggregation => aggregation.getPredicate.toList.map { predicate => concatenateSqlActions( - sql""" AND #${mapAggregationToSQL(aggregation.function, aggregation.field)} """, + sql""" AND #${mapAggregationToSQL(aggregation, aggregation.field)} """, mapOperationToSQL(predicate.operation, predicate.inverse, predicate.set.map(_.toString)) ) } @@ -322,14 +322,20 @@ object DatabaseUtil { s"to_char($field, '$format')" /** maps aggregation operation to the SQL function*/ - private def mapAggregationToSQL(aggregationType: AggregationType, column: String): String = - aggregationType match { - case AggregationType.sum => s"SUM($column)" - case AggregationType.count => s"COUNT($column)" - case AggregationType.max => s"MAX($column)" - case AggregationType.min => s"MIN($column)" - case AggregationType.avg => s"AVG($column)" + private def mapAggregationToSQL(aggregation: Aggregation, column: String): String = { + val col = if(aggregation.distinct.getOrElse(false)) { + s"DISTINCT $column" + } else { + column } + aggregation.function match { + case AggregationType.sum => s"SUM($col)" + case AggregationType.count => s"COUNT($col)" + case AggregationType.max => s"MAX($col)" + case AggregationType.min => s"MIN($col)" + case AggregationType.avg => s"AVG($col)" + } + } /** maps aggregation operation to the SQL alias */ private def mapAggregationToAlias(aggregationType: AggregationType, column: String): String = diff --git a/conseil-lorre/src/main/scala/tech/cryptonomic/conseil/indexer/tezos/bigmaps/BigMapsOperations.scala b/conseil-lorre/src/main/scala/tech/cryptonomic/conseil/indexer/tezos/bigmaps/BigMapsOperations.scala index ad75ab83c..55cfb02aa 100644 --- a/conseil-lorre/src/main/scala/tech/cryptonomic/conseil/indexer/tezos/bigmaps/BigMapsOperations.scala +++ b/conseil-lorre/src/main/scala/tech/cryptonomic/conseil/indexer/tezos/bigmaps/BigMapsOperations.scala @@ -27,6 +27,7 @@ import tech.cryptonomic.conseil.common.tezos.TezosTypes.BlockTagged.fromBlockDat */ case class BigMapsOperations[Profile <: ExPostgresProfile](profile: Profile) extends ConseilLogSupport { import profile.api._ + import io.scalaland.chimney.dsl._ /** Create an action to find and copy big maps based on the diff contained in the blocks * @@ -95,7 +96,10 @@ case class BigMapsOperations[Profile <: ExPostgresProfile](profile: Profile) ext DBIO.sequence { List( Tables.BigMapContents.insertOrUpdateAll(rowsToWrite), - Tables.BigMapContentsHistory ++= updateData.map(Tables.BigMapContentsHistoryRow.tupled).distinct + Tables.BigMapContentsHistory ++= updateData + .map(BigMapContentsRow.tupled) + .map(_.transformInto[Tables.BigMapContentsHistoryRow]) + .distinct ) } } From 2b50a388dab00e00d0aa6b9e232e85b3524006a3 Mon Sep 17 00:00:00 2001 From: piotrkosecki Date: Thu, 12 Aug 2021 18:19:46 +0200 Subject: [PATCH 2/2] updates to aggregation after consultations with Mike --- .../data/tezos/TezosDataOperationsTest.scala | 8 +++--- .../common/generic/chain/DataTypes.scala | 8 ++---- .../conseil/common/util/DatabaseUtil.scala | 28 ++++++++----------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala b/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala index bf3a22a98..3e286ec92 100644 --- a/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala +++ b/conseil-api/src/test/scala/tech/cryptonomic/conseil/api/routes/platform/data/tezos/TezosDataOperationsTest.scala @@ -2276,7 +2276,7 @@ class TezosDataOperationsTest val aggregate = List( Aggregation("low", AggregationType.count), - Aggregation("medium", AggregationType.count, Some(true)) + Aggregation("medium", AggregationType.countDistinct) ) val populateAndTest = for { @@ -2286,7 +2286,7 @@ class TezosDataOperationsTest table = Tables.Fees.baseTableRow.tableName, columns = List(SimpleField("low"), SimpleField("medium"), SimpleField("high")), predicates = List.empty, - ordering = List(QueryOrdering("count_medium", OrderDirection.desc)), + ordering = List(QueryOrdering("count_distinct_medium", OrderDirection.desc)), aggregation = aggregate, temporalPartition = None, snapshot = None, @@ -2298,8 +2298,8 @@ class TezosDataOperationsTest val result = dbHandler.run(populateAndTest.transactionally).futureValue result shouldBe List( - Map("count_low" -> Some(4), "count_medium" -> Some(2), "high" -> Some(1)), - Map("count_low" -> Some(2), "count_medium" -> Some(1), "high" -> Some(2)) + Map("count_low" -> Some(4), "count_distinct_medium" -> Some(2), "high" -> Some(1)), + Map("count_low" -> Some(2), "count_distinct_medium" -> Some(1), "high" -> Some(2)) ) } diff --git a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala index e70346dc4..56ff3dbd1 100644 --- a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala +++ b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/generic/chain/DataTypes.scala @@ -24,7 +24,7 @@ object DataTypes { /** Method checks if type can be aggregated */ lazy val canBeAggregated: DataType => AggregationType => Boolean = { dataType => { - case AggregationType.count => true + case AggregationType.count | AggregationType.countDistinct => true case AggregationType.max | AggregationType.min => Set(DataType.Decimal, DataType.Int, DataType.LargeInt, DataType.DateTime, DataType.Currency)(dataType) case AggregationType.avg | AggregationType.sum => @@ -163,20 +163,18 @@ object DataTypes { case class ApiAggregation( field: String, function: AggregationType = AggregationType.sum, - distinct: Option[Boolean] = None, predicate: Option[ApiAggregationPredicate] = None ) { /** Transforms Aggregation received form API into Aggregation */ def toAggregation: Aggregation = - Aggregation(field, function, distinct, predicate.map(_.toAggregationPredicate)) + Aggregation(field, function, predicate.map(_.toAggregationPredicate)) } /** Class representing aggregation */ case class Aggregation( field: String, function: AggregationType = AggregationType.sum, - distinct: Option[Boolean] = None, predicate: Option[AggregationPredicate] = None ) { @@ -251,7 +249,7 @@ object DataTypes { /** Helper method for extracting prefixes needed for SQL */ def prefixes: List[String] = values.toList.map(_.toString + "_") type AggregationType = Value - val sum, count, max, min, avg = Value + val sum, count, max, min, avg, countDistinct = Value } /** Enumeration of aggregation functions */ diff --git a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala index edf4cb297..f4480ab03 100644 --- a/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala +++ b/conseil-common/src/main/scala/tech/cryptonomic/conseil/common/util/DatabaseUtil.scala @@ -184,7 +184,7 @@ object DatabaseUtil { limit: Int ): SQLActionBuilder = { val aggregationFields = aggregations.map { aggregation => - mapAggregationToSQL(aggregation, aggregation.field) + " as " + mapAggregationToAlias( + mapAggregationToSQL(aggregation.function, aggregation.field) + " as " + mapAggregationToAlias( aggregation.function, aggregation.field ) @@ -259,7 +259,7 @@ object DatabaseUtil { */ def makeQuery(table: String, columns: List[Field], aggregations: List[Aggregation]): SQLActionBuilder = { val aggregationFields = aggregations.map { aggregation => - mapAggregationToSQL(aggregation, aggregation.field) + " as " + mapAggregationToAlias( + mapAggregationToSQL(aggregation.function, aggregation.field) + " as " + mapAggregationToAlias( aggregation.function, aggregation.field ) @@ -311,7 +311,7 @@ object DatabaseUtil { aggregation.flatMap { aggregation => aggregation.getPredicate.toList.map { predicate => concatenateSqlActions( - sql""" AND #${mapAggregationToSQL(aggregation, aggregation.field)} """, + sql""" AND #${mapAggregationToSQL(aggregation.function, aggregation.field)} """, mapOperationToSQL(predicate.operation, predicate.inverse, predicate.set.map(_.toString)) ) } @@ -322,26 +322,22 @@ object DatabaseUtil { s"to_char($field, '$format')" /** maps aggregation operation to the SQL function*/ - private def mapAggregationToSQL(aggregation: Aggregation, column: String): String = { - val col = if(aggregation.distinct.getOrElse(false)) { - s"DISTINCT $column" - } else { - column - } - aggregation.function match { - case AggregationType.sum => s"SUM($col)" - case AggregationType.count => s"COUNT($col)" - case AggregationType.max => s"MAX($col)" - case AggregationType.min => s"MIN($col)" - case AggregationType.avg => s"AVG($col)" + private def mapAggregationToSQL(aggregationType: AggregationType, column: String): String = + aggregationType match { + case AggregationType.sum => s"SUM($column)" + case AggregationType.count => s"COUNT($column)" + case AggregationType.countDistinct => s"COUNT(DISTINCT $column)" + case AggregationType.max => s"MAX($column)" + case AggregationType.min => s"MIN($column)" + case AggregationType.avg => s"AVG($column)" } - } /** maps aggregation operation to the SQL alias */ private def mapAggregationToAlias(aggregationType: AggregationType, column: String): String = aggregationType match { case AggregationType.sum => s"sum_$column" case AggregationType.count => s"count_$column" + case AggregationType.countDistinct => s"count_distinct_$column" case AggregationType.max => s"max_$column" case AggregationType.min => s"min_$column" case AggregationType.avg => s"avg_$column"