From a4dbc9a37b89d5f0fbece8802c862bf98c3ca85a Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 4 Apr 2022 10:12:48 +0100 Subject: [PATCH] Moving Aggregation to Java (#3364) --- CHANGELOG.md | 3 + build.sbt | 2 +- .../Table/0.0.0-dev/THIRD-PARTY/NOTICE | 5 + .../com.ibm.icu.icu4j-67.1/LICENSE | 414 ++++++++++++++++++ .../com.ibm.icu.icu4j-67.1/NOTICES | 405 +++++++++++++++++ .../0.0.0-dev/src/Data/Aggregate_Column.enso | 90 ++-- .../Table/0.0.0-dev/src/Data/Table.enso | 65 ++- .../Standard/Table/0.0.0-dev/src/Error.enso | 29 +- .../Internal/Aggregate_Column_Aggregator.enso | 152 ------- .../src/Internal/Aggregate_Column_Helper.enso | 74 +++- .../enso/table/aggregations/Aggregator.java | 98 +++++ .../enso/table/aggregations/Concatenate.java | 73 +++ .../org/enso/table/aggregations/Count.java | 24 + .../table/aggregations/CountDistinct.java | 48 ++ .../enso/table/aggregations/CountEmpty.java | 44 ++ .../enso/table/aggregations/CountNothing.java | 37 ++ .../org/enso/table/aggregations/First.java | 31 ++ .../org/enso/table/aggregations/GroupBy.java | 23 + .../org/enso/table/aggregations/Last.java | 28 ++ .../org/enso/table/aggregations/Mean.java | 52 +++ .../org/enso/table/aggregations/MinOrMax.java | 73 +++ .../org/enso/table/aggregations/Mode.java | 59 +++ .../enso/table/aggregations/Percentile.java | 78 ++++ .../table/aggregations/ShortestOrLongest.java | 58 +++ .../table/aggregations/StandardDeviation.java | 59 +++ .../java/org/enso/table/aggregations/Sum.java | 48 ++ .../column/builder/object/NumericBuilder.java | 4 + .../org/enso/table/data/index/HashIndex.java | 3 +- .../table/data/index/MultiValueIndex.java | 95 ++++ .../enso/table/data/index/MultiValueKey.java | 74 ++++ .../java/org/enso/table/data/table/Table.java | 35 +- .../table/problems/AggregatedProblems.java | 68 +++ .../problems/ColumnAggregatedProblems.java | 29 ++ .../table/problems/FloatingPointGrouping.java | 23 + .../table/problems/InvalidAggregation.java | 25 ++ .../table/data/table/problems/Problem.java | 8 + .../table/problems/UnquotedDelimiter.java | 25 ++ test/Benchmarks/src/Table/Aggregate.enso | 28 +- .../src/Aggregate_Column_Spec.enso | 8 +- test/Table_Tests/src/Aggregate_Spec.enso | 106 ++++- .../src/Database/Postgresql_Spec.enso | 2 +- .../src/Database/Redshift_Spec.enso | 2 +- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- .../com.ibm.icu.icu4j-67.1/copyright-ignore | 4 + .../com.ibm.icu.icu4j-67.1/copyright-keep | 203 +++++++++ .../com.ibm.icu.icu4j-67.1/custom-license | 1 + .../Table/com.ibm.icu.icu4j-67.1/files-keep | 1 + tools/legal-review/Table/report-state | 4 +- 48 files changed, 2563 insertions(+), 259 deletions(-) create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/LICENSE create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/NOTICES delete mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Aggregator.enso create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Count.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/First.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Last.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java create mode 100644 std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKey.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/ColumnAggregatedProblems.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/FloatingPointGrouping.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/InvalidAggregation.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/Problem.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/problems/UnquotedDelimiter.java create mode 100644 tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-ignore create mode 100644 tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-keep create mode 100644 tools/legal-review/Table/com.ibm.icu.icu4j-67.1/custom-license create mode 100644 tools/legal-review/Table/com.ibm.icu.icu4j-67.1/files-keep diff --git a/CHANGELOG.md b/CHANGELOG.md index a9bc500421c8..4c8d5d350b1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,6 +90,8 @@ - [Implemented `Panic.catch` and helper functions for handling errors. Added a type parameter to `Panic.recover` to recover specific types of errors.][3344] - [Added warning handling to `Table.aggregate`][3349] +- [Improved performance of `Table.aggregate` and full warnings implementation] + [3364] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -137,6 +139,7 @@ [3346]: https://github.com/enso-org/enso/pull/3346 [3349]: https://github.com/enso-org/enso/pull/3349 [3361]: https://github.com/enso-org/enso/pull/3361 +[3364]: https://github.com/enso-org/enso/pull/3364 #### Enso Compiler diff --git a/build.sbt b/build.sbt index 89211f100879..b4d2cf0d7027 100644 --- a/build.sbt +++ b/build.sbt @@ -1582,8 +1582,8 @@ lazy val `std-table` = project Compile / packageBin / artifactPath := `table-polyglot-root` / "std-table.jar", libraryDependencies ++= Seq( + "com.ibm.icu" % "icu4j" % icuVersion, "com.univocity" % "univocity-parsers" % "2.9.0", - "org.graalvm.sdk" % "graal-sdk" % graalVersion % "provided", "org.apache.poi" % "poi-ooxml" % "5.0.0", "org.apache.xmlbeans" % "xmlbeans" % "5.0.1" ), diff --git a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE index 9d9ada0c8ffb..f9990f91be68 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE +++ b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE @@ -11,6 +11,11 @@ The license file can be found at `licenses/BSD-3-Clause`. Copyright notices related to this dependency can be found in the directory `com.github.virtuald.curvesapi-1.06`. +'icu4j', licensed under the Unicode/ICU License, is distributed with the Table. +The license information can be found along with the copyright notices. +Copyright notices related to this dependency can be found in the directory `com.ibm.icu.icu4j-67.1`. + + 'univocity-parsers', licensed under the Apache 2, is distributed with the Table. The license file can be found at `licenses/APACHE2.0`. Copyright notices related to this dependency can be found in the directory `com.univocity.univocity-parsers-2.9.0`. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/LICENSE b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/LICENSE new file mode 100644 index 000000000000..e7f98ed18391 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/LICENSE @@ -0,0 +1,414 @@ +COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + +Copyright © 1991-2020 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +--------------------- + +Third-Party Software Licenses + +This section contains third-party software notices and/or additional +terms for licensed third-party software components included within ICU +libraries. + +1. ICU License - ICU 1.8.1 to ICU 57.1 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2016 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY +SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +All trademarks and registered trademarks mentioned herein are the +property of their respective owners. + +2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) + + # The Google Chrome software developed by Google is licensed under + # the BSD license. Other software included in this distribution is + # provided under other licenses, as set forth below. + # + # The BSD License + # http://opensource.org/licenses/bsd-license.php + # Copyright (C) 2006-2008, Google Inc. + # + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # Redistributions of source code must retain the above copyright notice, + # this list of conditions and the following disclaimer. + # Redistributions in binary form must reproduce the above + # copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided with + # the distribution. + # Neither the name of Google Inc. nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # + # + # The word list in cjdict.txt are generated by combining three word lists + # listed below with further processing for compound word breaking. The + # frequency is generated with an iterative training against Google web + # corpora. + # + # * Libtabe (Chinese) + # - https://sourceforge.net/project/?group_id=1519 + # - Its license terms and conditions are shown below. + # + # * IPADIC (Japanese) + # - http://chasen.aist-nara.ac.jp/chasen/distribution.html + # - Its license terms and conditions are shown below. + # + # ---------COPYING.libtabe ---- BEGIN-------------------- + # + # /* + # * Copyright (c) 1999 TaBE Project. + # * Copyright (c) 1999 Pai-Hsiang Hsiao. + # * All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the TaBE Project nor the names of its + # * contributors may be used to endorse or promote products derived + # * from this software without specific prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # /* + # * Copyright (c) 1999 Computer Systems and Communication Lab, + # * Institute of Information Science, Academia + # * Sinica. All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the Computer Systems and Communication Lab + # * nor the names of its contributors may be used to endorse or + # * promote products derived from this software without specific + # * prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # Copyright 1996 Chih-Hao Tsai @ Beckman Institute, + # University of Illinois + # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 + # + # ---------------COPYING.libtabe-----END-------------------------------- + # + # + # ---------------COPYING.ipadic-----BEGIN------------------------------- + # + # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science + # and Technology. All Rights Reserved. + # + # Use, reproduction, and distribution of this software is permitted. + # Any copy of this software, whether in its original form or modified, + # must include both the above copyright notice and the following + # paragraphs. + # + # Nara Institute of Science and Technology (NAIST), + # the copyright holders, disclaims all warranties with regard to this + # software, including all implied warranties of merchantability and + # fitness, in no event shall NAIST be liable for + # any special, indirect or consequential damages or any damages + # whatsoever resulting from loss of use, data or profits, whether in an + # action of contract, negligence or other tortuous action, arising out + # of or in connection with the use or performance of this software. + # + # A large portion of the dictionary entries + # originate from ICOT Free Software. The following conditions for ICOT + # Free Software applies to the current dictionary as well. + # + # Each User may also freely distribute the Program, whether in its + # original form or modified, to any third party or parties, PROVIDED + # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear + # on, or be attached to, the Program, which is distributed substantially + # in the same form as set out herein and that such intended + # distribution, if actually made, will neither violate or otherwise + # contravene any of the laws and regulations of the countries having + # jurisdiction over the User or the intended distribution itself. + # + # NO WARRANTY + # + # The program was produced on an experimental basis in the course of the + # research and development conducted during the project and is provided + # to users as so produced on an experimental basis. Accordingly, the + # program is provided without any warranty whatsoever, whether express, + # implied, statutory or otherwise. The term "warranty" used herein + # includes, but is not limited to, any warranty of the quality, + # performance, merchantability and fitness for a particular purpose of + # the program and the nonexistence of any infringement or violation of + # any right of any third party. + # + # Each user of the program will agree and understand, and be deemed to + # have agreed and understood, that there is no warranty whatsoever for + # the program and, accordingly, the entire risk arising from or + # otherwise connected with the program is assumed by the user. + # + # Therefore, neither ICOT, the copyright holder, or any other + # organization that participated in or was otherwise related to the + # development of the program and their respective officials, directors, + # officers and other employees shall be held liable for any and all + # damages, including, without limitation, general, special, incidental + # and consequential damages, arising out of or otherwise in connection + # with the use or inability to use the program or any product, material + # or result produced or otherwise obtained by using the program, + # regardless of whether they have been advised of, or otherwise had + # knowledge of, the possibility of such damages at any time during the + # project or thereafter. Each user will be deemed to have agreed to the + # foregoing by his or her commencement of use of the program. The term + # "use" as used herein includes, but is not limited to, the use, + # modification, copying and distribution of the program and the + # production of secondary products from the program. + # + # In the case where the program, whether in its original form or + # modified, was distributed or delivered to or received by a user from + # any person, organization or entity other than ICOT, unless it makes or + # grants independently of ICOT any specific warranty to the user in + # writing, such person, organization or entity, will also be exempted + # from and not be held liable to the user for any such damages as noted + # above as far as the program is concerned. + # + # ---------------COPYING.ipadic-----END---------------------------------- + +3. Lao Word Break Dictionary Data (laodict.txt) + + # Copyright (c) 2013 International Business Machines Corporation + # and others. All Rights Reserved. + # + # Project: http://code.google.com/p/lao-dictionary/ + # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt + # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt + # (copied below) + # + # This file is derived from the above dictionary, with slight + # modifications. + # ---------------------------------------------------------------------- + # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell. + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, + # are permitted provided that the following conditions are met: + # + # + # Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. Redistributions in + # binary form must reproduce the above copyright notice, this list of + # conditions and the following disclaimer in the documentation and/or + # other materials provided with the distribution. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # OF THE POSSIBILITY OF SUCH DAMAGE. + # -------------------------------------------------------------------------- + +4. Burmese Word Break Dictionary Data (burmesedict.txt) + + # Copyright (c) 2014 International Business Machines Corporation + # and others. All Rights Reserved. + # + # This list is part of a project hosted at: + # github.com/kanyawtech/myanmar-karen-word-lists + # + # -------------------------------------------------------------------------- + # Copyright (c) 2013, LeRoy Benjamin Sharon + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: Redistributions of source code must retain the above + # copyright notice, this list of conditions and the following + # disclaimer. Redistributions in binary form must reproduce the + # above copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided + # with the distribution. + # + # Neither the name Myanmar Karen Word Lists, nor the names of its + # contributors may be used to endorse or promote products derived + # from this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + # -------------------------------------------------------------------------- + +5. Time Zone Database + + ICU uses the public domain data and code derived from Time Zone +Database for its time zone support. The ownership of the TZ database +is explained in BCP 175: Procedure for Maintaining the Time Zone +Database section 7. + + # 7. Database Ownership + # + # The TZ database itself is not an IETF Contribution or an IETF + # document. Rather it is a pre-existing and regularly updated work + # that is in the public domain, and is intended to remain in the + # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do + # not apply to the TZ Database or contributions that individuals make + # to it. Should any claims be made and substantiated against the TZ + # Database, the organization that is providing the IANA + # Considerations defined in this RFC, under the memorandum of + # understanding with the IETF, currently ICANN, may act in accordance + # with all competent court orders. No ownership claims will be made + # by ICANN or the IETF Trust on the database or the code. Any person + # making a contribution to the database or code waives all rights to + # future claims in that contribution or in the TZ Database. + +6. Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/NOTICES b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/NOTICES new file mode 100644 index 000000000000..2f3c80450a06 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/com.ibm.icu.icu4j-67.1/NOTICES @@ -0,0 +1,405 @@ +

Copyright © IBM Corporation 1999. All rights reserved. + +Copyright (C) 1996-2004, International Business Machines Corporation and * + +Copyright (C) 1996-2005, International Business Machines Corporation and * + +Copyright (C) 1996-2007, International Business Machines Corporation and * + +Copyright (C) 1996-2007, International Business Machines Corporation and * + +Copyright (C) 1996-2009, Google, International Business Machines Corporation and * + +Copyright (C) 1996-2009, International Business Machines Corporation and * + +Copyright (C) 1996-2010, International Business Machines Corporation and + +Copyright (C) 1996-2010, International Business Machines Corporation and * + +Copyright (C) 1996-2010, International Business Machines Corporation and * + +Copyright (C) 1996-2011, International Business Machines Corporation and + +Copyright (C) 1996-2011, International Business Machines Corporation and * + +Copyright (C) 1996-2011, International Business Machines Corporation and * + +Copyright (C) 1996-2012, International Business Machines Corporation and * + +Copyright (C) 1996-2013, International Business Machines Corporation and + +Copyright (C) 1996-2013, International Business Machines Corporation and * + +Copyright (C) 1996-2014, International Business Machines Corporation and + +Copyright (C) 1996-2014, International Business Machines Corporation and * + +Copyright (C) 1996-2015, Google, Inc., International Business Machines Corporation and + +Copyright (C) 1996-2015, International Business Machines + +Copyright (C) 1996-2015, International Business Machines Corporation and + +Copyright (C) 1996-2015, International Business Machines Corporation and * + +Copyright (C) 1996-2016, Google, International Business Machines Corporation and + +Copyright (C) 1996-2016, International Business Machines + +Copyright (C) 1996-2016, International Business Machines Corporation and + +Copyright (C) 1996-2016, International Business Machines Corporation and * + +Copyright (C) 1996-2016, International Business Machines Corporation and * + +Copyright (C) 1999-2014, International Business Machines + +Copyright (C) 1999-2015, International Business Machines + +Copyright (C) 2000-2009, International Business Machines Corporation and * + +Copyright (C) 2000-2014, International Business Machines + +Copyright (C) 2000-2014, International Business Machines Corporation and + +Copyright (C) 2000-2016, International Business Machines Corporation and + +Copyright (C) 2001-2004, International Business Machines Corporation and * + +Copyright (C) 2001-2008, International Business Machines + +Copyright (C) 2001-2009, International Business Machines Corporation and * + +Copyright (C) 2001-2010, International Business Machines + +Copyright (C) 2001-2010, International Business Machines Corporation and * + +Copyright (C) 2001-2011, International Business Machines Corporation and * + +Copyright (C) 2001-2012, International Business Machines + +Copyright (C) 2001-2013, International Business Machines Corporation and * + +Copyright (C) 2001-2014, International Business Machines + +Copyright (C) 2001-2015, International Business Machines Corporation and + +Copyright (C) 2001-2016 International Business Machines Corporation and + +Copyright (C) 2001-2016, International Business Machines + +Copyright (C) 2001-2016, International Business Machines Corporation and + +Copyright (C) 2001-2016, International Business Machines Corporation and * + +Copyright (C) 2002-2009 International Business Machines Corporation * + +Copyright (C) 2002-2010, International Business Machines + +Copyright (C) 2002-2010, International Business Machines Corporation and * + +Copyright (C) 2002-2014, International Business Machines Corporation and others. + +Copyright (C) 2002-2016, International Business Machines Corporation and + +Copyright (C) 2003-2010, International Business Machines + +Copyright (C) 2003-2011, International Business Machines Corporation and * + +Copyright (C) 2003-2011, International Business Machines Corporation and * + +Copyright (C) 2003-2014, International Business Machines Corporation and + +Copyright (C) 2003-2014, International Business Machines Corporation and * + +Copyright (C) 2003-2015, International Business Machines Corporation and + +Copyright (C) 2003-2016, Google, International Business Machines Corporation + +Copyright (C) 2003-2016, International Business Machines Corporation and + +Copyright (C) 2003-2016, International Business Machines Corporation and * + +Copyright (C) 2003-2016, International Business Machines Corporation and others. All Rights Reserved. + +Copyright (C) 2004-2006, International Business Machines Corporation and * + +Copyright (C) 2004-2009, International Business Machines Corporation and * + +Copyright (C) 2004-2009, International Business Machines Corporation and * + +Copyright (C) 2004-2010, International Business Machines + +Copyright (C) 2004-2010, International Business Machines Corporation and * + +Copyright (C) 2004-2014, International Business Machines Corporation and + +Copyright (C) 2004-2015, International Business Machines + +Copyright (C) 2004-2016, Google Inc, International Business Machines + +Copyright (C) 2004-2016, International Business Machines Corporation and + +Copyright (C) 2004-2016, International Business Machines Corporation and * + +Copyright (C) 2005 - 2012, International Business Machines Corporation and * + +Copyright (C) 2005 - 2014, International Business Machines Corporation and * + +Copyright (C) 2005-2006, International Business Machines + +Copyright (C) 2005-2010, International Business Machines + +Copyright (C) 2005-2011, International Business Machines Corporation and * + +Copyright (C) 2005-2012, International Business Machines Corporation and * + +Copyright (C) 2005-2012, International Business Machines Corporation and * + +Copyright (C) 2005-2013, International Business Machines Corporation and * + +Copyright (C) 2005-2015, International Business Machines Corporation and + +Copyright (C) 2005-2016 International Business Machines Corporation and + +Copyright (C) 2005-2016, International Business Machines Corporation and + +Copyright (C) 2005-2016, International Business Machines Corporation and * + +Copyright (C) 2006-2009, Google, International Business Machines Corporation * + +Copyright (C) 2006-2015, International Business Machines Corporation and + +Copyright (C) 2006-2016, Google, International Business Machines Corporation + +Copyright (C) 2007, International Business Machines Corporation and * + +Copyright (C) 2007-2008, International Business Machines Corporation and * + +Copyright (C) 2007-2009, International Business Machines Corporation and * + +Copyright (C) 2007-2010, International Business Machines Corporation and * + +Copyright (C) 2007-2010, International Business Machines Corporation and * + +Copyright (C) 2007-2011, International Business Machines Corporation and * + +Copyright (C) 2007-2011, International Business Machines Corporation and others. + +Copyright (C) 2007-2012, International Business Machines Corporation and * + +Copyright (C) 2007-2013, International Business Machines Corporation and * + +Copyright (C) 2007-2014, International Business Machines Corporation and * + +Copyright (C) 2007-2014, International Business Machines Corporation and * + +Copyright (C) 2007-2015, Google Inc, International Business Machines Corporation + +Copyright (C) 2007-2015, International Business Machines Corporation and + +Copyright (C) 2007-2015, International Business Machines Corporation and * + +Copyright (C) 2007-2016, International Business Machines + +Copyright (C) 2007-2016, International Business Machines Corporation and + +Copyright (C) 2007-2016, International Business Machines Corporation and * + +Copyright (C) 2008-2009, Google, International Business Machines + +Copyright (C) 2008-2009, International Business Machines + +Copyright (C) 2008-2014, Google, International Business Machines + +Copyright (C) 2008-2014, International Business Machines Corporation and * + +Copyright (C) 2008-2015, Google, International Business Machines Corporation and + +Copyright (C) 2008-2015, International Business Machines Corporation and + +Copyright (C) 2008-2016 International Business Machines Corporation + +Copyright (C) 2008-2016, Google Inc, International Business Machines Corporation + +Copyright (C) 2008-2016, International Business Machines + +Copyright (C) 2008-2016, International Business Machines Corporation and + +Copyright (C) 2009 , Yahoo! Inc. * + +Copyright (C) 2009, Google, International Business Machines Corporation and * + +Copyright (C) 2009, International Business Machines Corporation and * + +Copyright (C) 2009, International Business Machines Corporation and * + +Copyright (C) 2009,2016 International Business Machines Corporation and + +Copyright (C) 2009-2010, Google, International Business Machines Corporation * + +Copyright (C) 2009-2010, International Business Machines Corporation and * + +Copyright (C) 2009-2011, Google, International Business Machines Corporation + +Copyright (C) 2009-2011, International Business Machines Corporation and * + +Copyright (C) 2009-2012, International Business Machines Corporation and * + +Copyright (C) 2009-2013, International Business Machines Corporation and * + +Copyright (C) 2009-2014, International Business Machines + +Copyright (C) 2009-2014, International Business Machines Corporation and + +Copyright (C) 2009-2014, International Business Machines Corporation and * + +Copyright (C) 2009-2015, Google, International Business Machines Corporation + +Copyright (C) 2009-2015, International Business Machines + +Copyright (C) 2009-2015, International Business Machines Corporation and + +Copyright (C) 2009-2015, International Business Machines Corporation and * + +Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation + +Copyright (C) 2009-2016, International Business Machines + +Copyright (C) 2009-2016, International Business Machines Corporation and + +Copyright (C) 2009-2016, International Business Machines Corporation and * + +Copyright (C) 2009-2016, International Business Machines Corporation, + +Copyright (C) 2010, International Business Machines + +Copyright (C) 2010, International Business Machines Corporation and * + +Copyright (C) 2010-2011, Google, International Business Machines * + +Copyright (C) 2010-2013, International Business Machines Corporation and * + +Copyright (C) 2010-2014, Google, International Business Machines Corporation * + +Copyright (C) 2010-2014, International Business Machines + +Copyright (C) 2010-2015, International Business Machines + +Copyright (C) 2010-2016, Google, Inc.; International Business Machines * + +Copyright (C) 2010-2016, International Business Machines + +Copyright (C) 2011, International Business Machines + +Copyright (C) 2011, International Business Machines Corporation and * + +Copyright (C) 2011-2014, International Business Machines + +Copyright (C) 2011-2016, International Business Machines Corporation + +Copyright (C) 2011-2016, International Business Machines Corporation and + +Copyright (C) 2011-2016, International Business Machines Corporation and * + +Copyright (C) 2012, International Business Machines Corporation and * + +Copyright (C) 2012-2014, International Business Machines + +Copyright (C) 2012-2014, International Business Machines Corporation and * + +Copyright (C) 2012-2015, International Business Machines + +Copyright (C) 2012-2015, International Business Machines Corporation and * + +Copyright (C) 2012-2016, Google, International Business Machines Corporation and + +Copyright (C) 2012-2016, International Business Machines + +Copyright (C) 2012-2016, International Business Machines Corporation and + +Copyright (C) 2012-2016, International Business Machines Corporation and * + +Copyright (C) 2013, Google Inc, International Business Machines Corporation and * + +Copyright (C) 2013, International Business Machines Corporation and * + +Copyright (C) 2013-2014, International Business Machines + +Copyright (C) 2013-2014, International Business Machines Corporation and * + +Copyright (C) 2013-2015, International Business Machines + +Copyright (C) 2013-2015, International Business Machines Corporation and + +Copyright (C) 2013-2016, International Business Machines Corporation and + +Copyright (C) 2014, International Business Machines Corporation and + +Copyright (C) 2014, International Business Machines Corporation and * + +Copyright (C) 2014-2015, International Business Machines Corporation and + +Copyright (C) 2014-2016, International Business Machines Corporation and + +Copyright (C) 2015, International Business Machines Corporation and + +Copyright (C) 2015-2016, International Business Machines Corporation and + +Copyright (C) 2016, International Business Machines Corporation and + +Copyright (C) 2016, International Business Machines Corporation and * + +Copyright (c) 2001-2011, International Business Machines + +Copyright (c) 2001-2016, International Business Machines + +Copyright (c) 2001-2016, International Business Machines Corporation and + +Copyright (c) 2002, International Business Machines Corporation + +Copyright (c) 2002-2007, International Business Machines Corporation + +Copyright (c) 2002-2010, International Business Machines + +Copyright (c) 2002-2010, International Business Machines Corporation + +Copyright (c) 2002-2011, International Business Machines Corporation + +Copyright (c) 2002-2014, Google, International Business Machines + +Copyright (c) 2002-2014, International Business Machines Corporation + +Copyright (c) 2002-2015, International Business Machines + +Copyright (c) 2002-2016, International Business Machines + +Copyright (c) 2003-2010, International Business Machines + +Copyright (c) 2003-2011, International Business Machines + +Copyright (c) 2003-2016 International Business Machines + +Copyright (c) 2003-2016, International Business Machines + +Copyright (c) 2004-2010, International Business Machines + +Copyright (c) 2004-2013, International Business Machines + +Copyright (c) 2004-2014, International Business Machines + +Copyright (c) 2004-2015, International Business Machines + +Copyright (c) 2004-2016, International Business Machines + +Copyright (c) 2007-2009 International Business Machines Corporation and * + +Copyright (c) 2007-2015 International Business Machines Corporation and * + +Copyright (c) 2013-2014, International Business Machines + +Copyright IBM Corporation, 1996-2016. All Rights Reserved. */ + +Copyright IBM Corporation, 1997, 2000, 2005, 2007. All Rights Reserved. */ + +copyright=" Copyright (c) IBM Corporation 1996, 2000. All rights reserved. "; diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso index af80ee626d0c..2b2e4d4c459b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso @@ -7,17 +7,19 @@ type Aggregate_Column ## Group By type Group_By (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) - ## Creates a new column with the row count of each group + ## Creates a new column with the row count of each group. If no rows, + evaluates to 0. Arguments: - name: name of new column. type Count (new_name:Text|Nothing=Nothing) ## Creates a new column with the count of unique items in the selected - column(s) within each group. + column(s) within each group. If no rows, evaluates to 0. Arguments: - - columns: either a single or set of columns (specified by name, index or Column object) to count across. + - columns: either a single or set of columns (specified by name, index or + Column object) to count across. - name: name of new column. - ignore_nothing: if all values are Nothing won't be included. type Count_Distinct (columns:Column|Text|Integer|Column_Selector) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False) @@ -25,7 +27,7 @@ type Aggregate_Column ## ALIAS Count_Not_Null Creates a new column with the count of not `Nothing` (null) values of the - specified column within each group. + specified column within each group. If no rows, evaluates to 0. Arguments: - columns: column (specified by name, index or Column object) to count. @@ -35,7 +37,7 @@ type Aggregate_Column ## ALIAS Count_Null, Count_Missing Creates a new column with the count of `Nothing` (null) values of the - specified column within each group. + specified column within each group. If no rows, evaluates to 0. Arguments: - column: column (specified by name, index or Column object) to count. @@ -43,7 +45,7 @@ type Aggregate_Column type Count_Nothing (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the count of not `Nothing` (null) and non-empty - ("") values of the column within each group. + ("") values of the column within each group. If no rows, evaluates to 0. Arguments: - column: column (specified by name, index or Column object) to count. @@ -51,7 +53,7 @@ type Aggregate_Column type Count_Not_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the count of `Nothing` (null) or empty ("") - text values of the column within each group. + text values of the column within each group. If no rows, evaluates to 0. Arguments: - column: column (specified by name, index or Column object) to count. @@ -59,7 +61,7 @@ type Aggregate_Column type Count_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the sum of values (ignoring missing values) of - the specified column within each group. + the column within each group. If no rows, evaluates to `Nothing`. Arguments: - column: column (specified by name, index or Column object) to total. @@ -67,7 +69,7 @@ type Aggregate_Column type Sum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the mean of values (ignoring missing values) of - the specified column within each group. + the column within each group. If no rows, evaluates to `Nothing`. Arguments: - column: column (specified by name, index or Column object) to average. @@ -75,43 +77,50 @@ type Aggregate_Column type Average (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the median of values (ignoring missing values) - of the specified column within each group. + of the column within each group. If no rows, evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to calculate median on. + - column: column (specified by name, index or Column object) to calculate + median on. - name: name of new column. type Median (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the median of values (ignoring missing values) - of the specified column within each group. + of the column within each group. If no rows, evaluates to `Nothing`. Arguments: - percentile: Percentage to compute from 0-1 inclusive. - - column: column (specified by name, index or Column object) to compute percentile. + - column: column (specified by name, index or Column object) to compute + percentile. - name: name of new column. type Percentile (percentile:Decimal) (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the mode of values (ignoring missing values) - of the specified column within each group. + of the column within each group. If no rows, evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find the most common value. + - column: column (specified by name, index or Column object) to find the + most common value. - name: name of new column. type Mode (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) ## Creates a new column with the standard deviation of values (ignoring - missing values) of the column within each group. + missing values) of the column within each group. If no rows, evaluates to + `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to compute standard deviation. + - column: column (specified by name, index or Column object) to compute + standard deviation. - name: name of new column. - population argument specifies if group is a sample or the population type Standard_Deviation (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (population:Boolean=False) - ## Creates a new column with the values concatenated together. `Nothing` values will become an empty string. + ## Creates a new column with the values concatenated together. `Nothing` + values will become an empty string. If no rows, evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to concatenate values. + - column: column (specified by name, index or Column object) to + concatenate values. - name: name of new column. - separator: added between each value. - prefix: added at the start of the result. @@ -120,10 +129,12 @@ type Aggregate_Column or contains the separtor. type Concatenate (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="") - ## Creates a new column with the first value in each group. + ## Creates a new column with the first value in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find first group entry. + - column: column (specified by name, index or Column object) to find + first group entry. - name: name of new column. - ignore_nothing: if `True`, then missing values are ignored and first not missing value returned. @@ -131,10 +142,12 @@ type Aggregate_Column results within the group. type First (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing) - ## Creates a new column with the last value in each group. + ## Creates a new column with the last value in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find last group entry. + - column: column (specified by name, index or Column object) to find + last group entry. - name: name of new column. - ignore_nothing: if `True`, then missing values are ignored and last not missing value returned. @@ -142,37 +155,38 @@ type Aggregate_Column results within the group. type Last (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing) - ## Creates a new column with the maximum value in each group. + ## Creates a new column with the maximum value in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find maximum. + - column: column (specified by name, index or Column object) to find + maximum. - name: name of new column. type Maximum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) - ## Creates a new column with the maximum value in each group. + ## Creates a new column with the maximum value in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find minimum. + - column: column (specified by name, index or Column object) to find + minimum. - name: name of new column. type Minimum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) - ## Creates a new column with the shortest text in each group. + ## Creates a new column with the shortest text in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find shortest value. + - column: column (specified by name, index or Column object) to find + shortest value. - name: name of new column. type Shortest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) - ## Creates a new column with the longest text in each group. + ## Creates a new column with the longest text in each group. If no rows, + evaluates to `Nothing`. Arguments: - - column: column (specified by name, index or Column object) to find longest value. + - column: column (specified by name, index or Column object) to find + longest value. - name: name of new column. type Longest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) - -## Occurs when cannot aggregate a column -type Invalid_Aggregation_Method (column : Text) (message : Text) - -Invalid_Aggregation_Method.to_display_text : Text -Invalid_Aggregation_Method.to_display_text = - "The aggregate column "+this.column+" resulted in an error: "+this.message diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index fa6a763b87f4..d478ee4dbc8b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -9,7 +9,6 @@ import Standard.Table.Io.Spreadsheet_Write_Mode import Standard.Table.Io.Format import Standard.Table.Internal.Table_Helpers import Standard.Table.Internal.Aggregate_Column_Helper -import Standard.Table.Internal.Aggregate_Column_Aggregator from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index @@ -487,49 +486,47 @@ type Table this.take_end (this.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems - ## Prototype Group By function - aggregate : [Aggregate_Column] -> Problem_Behavior -> Table - aggregate columns (on_problems=Report_Warning) = - validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this + ## ALIAS group, summarize - make_key = if (validated.key_columns.length == 0) then _->(Group_By_Key.key [1]) else - i->(Group_By_Key.key (validated.key_columns.map v->(v.at i))) + Aggregates the rows in a table using any `Group_By` entries in columns. + The columns argument specifies which additional aggregations to perform and to return. - new_table = validated.valid_columns.map c->[c.first, Vector.new_builder] - aggregators = validated.valid_columns.map c->(Aggregate_Column_Aggregator.new c.second) - add_row _ = - idx = new_table.at 0 . at 1 . length - 0.up_to (aggregators.length) . each i-> - new_table.at i . at 1 . append ((aggregators.at i).initial) - idx + Arguments: + - columns: Vector of `Aggregate_Column` specifying the aggregated table. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. - # Fold - aggregate map i = - key = make_key i - row_index = map.get_or_else key (add_row Nothing) + The following problems can occur: + - If a column name is not in the input table, a `Missing_Input_Columns`. + - If a column index is out of range, a `Column_Indexes_Out_Of_Range`. + - If there are no valid columns in the output table, a `No_Output_Columns`. + - If there are invalid column names in the output table, a `Invalid_Output_Column_Names`. + - If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`. + - If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`. + - If an aggregation fails, an `Invalid_Aggregation_Method`. + - If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter` + - If there are more than 10 issues with a single column, an `Additional_Warnings`. - # Accumulate - 0.up_to (validated.valid_columns.length) . each j-> - accumulator = aggregators.at j . accumulator - array = new_table.at j . at 1 . to_array - current = array . at row_index - new = accumulator current i - array . set_at row_index new + > Example + Group by the Key column, count the rows - map.insert key row_index + table.aggregate [Group_By "Key", Count Nothing] + aggregate : [Aggregate_Column] -> Problem_Behavior -> Table + aggregate columns (on_problems=Report_Warning) = + validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this on_problems.attach_problems_before validated.problems <| - # Build Table - if ((validated.key_columns.length == 0) && (this.row_count == 0)) then (add_row Nothing) else - 0.up_to this.row_count . fold Map.empty aggregate + java_key_columns = validated.key_columns.map .java_column + index = this.java_table.indexFromColumns java_key_columns.to_array - # Now Finalise and make a table - finalise builder index = - aggregator = aggregators.at index - Vector.new builder.length i->(aggregator.finalizer (builder.to_array.at i)) + new_columns = validated.valid_columns.map c->(Aggregate_Column_Helper.java_aggregator c.first c.second) - here.new (new_table.map_with_index i->c->[c.first, finalise c.second i]) + java_table = index.makeTable new_columns.to_array + new_table = Table java_table + on_problems.attach_problems_after new_table <| + problems = java_table.getProblems + Aggregate_Column_Helper.parse_aggregated_problems problems ## ALIAS Filter Rows ALIAS Mask Columns diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso index 076a9b141c5b..7a49ede59fb8 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso @@ -48,7 +48,6 @@ No_Output_Columns.to_display_text : Text No_Output_Columns.to_display_text = "The result contains no columns." - ## Indicates that the provided Column_Selector has duplicate entries. type Duplicate_Column_Selectors (duplicate_selectors : [(Text | Integer)]) @@ -67,3 +66,31 @@ type Input_Indices_Already_Matched (indices : [Integer]) Input_Indices_Already_Matched.to_display_text : Text Input_Indices_Already_Matched.to_display_text = "The indices "+this.indices.short_display_text+" matched columns which have been matched earlier by other indices, so they did not introduce any new columns into the result." + +## Indicates that an aggregation calculation could not be completed. +type Invalid_Aggregation (column:Text) (rows:[Integer]) (message:Text) + +Invalid_Aggregation.to_display_text : Text +Invalid_Aggregation.to_display_text = + "The "+this.column+" could not be calculated at "+this.row.to_text+" : "+this.message + +## Indicates that a floating point number was used in a grouping. +type Floating_Point_Grouping (column:Text) (rows:[Integer]) + +Floating_Point_Grouping.to_display_text : Text +Floating_Point_Grouping.to_display_text = + "Grouping on floating points is not recommended within "+this.column+" at row "+this.row.to_text+"." + +## Indicates that a text value with a delimiter was included in a concatenation without any quote character +type Unquoted_Delimiter (column:Text) (rows:[Integer]) + +Unquoted_Delimiter.to_display_text : Text +Unquoted_Delimiter.to_display_text = + "The "+this.column+" at row "+this.row.to_text+" contains the delimiter and there is no specified quote character." + +## Warning when additional warnings occurred. +type Additional_Warnings (count:Integer) + +Additional_Warnings.to_display_text : Text +Additional_Warnings.to_display_text = + "There were "+this.count.to_text+" additional issues." diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Aggregator.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Aggregator.enso deleted file mode 100644 index bfc0b6b46bd8..000000000000 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Aggregator.enso +++ /dev/null @@ -1,152 +0,0 @@ -from Standard.Base import all - -from Standard.Table.Data.Column as Column_Module import Column -import Standard.Table.Data.Group_By_Key -from Standard.Table.Data.Aggregate_Column import all - -## Type used to compute an aggregate column -type Aggregate_Column_Aggregator - type Aggregate_Column_Aggregator initial accumulator finalizer - -## PRIVATE - Given a map of values and counts, find the value at a specified percentile -calculate_percentile p:Decimal value:Map = - count = value.fold 0 (+) - if count == 0 then Nothing else - mid_value = (count - 1)*p + 1 - if mid_value <= 1 then value.first.first else - if mid_value >= count then value.last.first else - mid = mid_value.floor - output = value.fold_with_key [0, Nothing, Nothing] c->k->v-> - new_v = c.first + v - new_s = if c.first.up_to new_v . contains (mid-1) then k else c.second - new_t = if c.first.up_to new_v . contains mid then k else (c.at 2) - [new_v, new_s, new_t] - (output.second + (output.at 2 - output.second) * (mid_value - mid)) - -## PRIVATE - Creates an aggregator from a resolved `Aggregate_Column`. - - You may need to transform the column with `resolve_columns` first to make - sure that it is resolved. -new : Aggregate_Column->Aggregate_Column_Aggregator -new column = - ## This can be removed completely, but it is being removed by other PR so I'm not touching it too much. - create_closure c function:(Column->Any->Integer->Any) = function c - - is_empty s = if s.is_nothing then True else case s of - Text -> s.is_empty - _ -> Error.throw (Invalid_Aggregation_Method this.col "Empty is only valid for Text") - - create_aggregator initial:Any=Nothing (accumulator:(Any->Integer->Any)=c->_->c) (finalizer:(Any->Any)=(v->v)) = - Aggregate_Column_Aggregator initial accumulator finalizer - - case column of - Group_By c _ -> - create_aggregator accumulator=(create_closure c col->_->i->(col.at i)) - Count _ -> - create_aggregator initial=0 accumulator=(c->_->c+1) - Count_Distinct columns _ ignore_nothing -> - key_maker i = Group_By_Key.key (columns.map c->(c.at i)) - accumulator = case ignore_nothing of - False-> map->i->(map.insert (key_maker i) 1) - True-> map->i-> - key = key_maker i - if key.values.all .is_nothing then map else (map.insert key 1) - create_aggregator initial=Map.empty accumulator=accumulator finalizer=(v->v.size) - Count_Not_Nothing c _ -> - accumulator = create_closure c col->count->i->(count + if (col.at i).is_nothing then 0 else 1) - create_aggregator initial=0 accumulator=accumulator - Count_Nothing c _ -> - accumulator = create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0) - create_aggregator initial=0 accumulator=accumulator - Count_Not_Empty c _ -> - accumulator = create_closure c col->count->i->(count + if is_empty (col.at i) then 0 else 1) - create_aggregator initial=0 accumulator=accumulator - Count_Empty c _ -> - accumulator = create_closure c col->count->i->(count + if is_empty (col.at i) then 1 else 0) - create_aggregator initial=0 accumulator=accumulator - Sum c _ -> - accumulator = create_closure c col->total->i-> - v = col.at i - if v.is_nothing then total else - if total.is_nothing then v else total + v - create_aggregator accumulator=accumulator - Average c _ -> - accumulator = create_closure c col->a->i-> - v = col.at i - if v.is_nothing then a else [a.first + 1, a.second + v] - finalizer value = if value.first == 0 then Nothing else (value.second / value.first) - create_aggregator initial=[0, 0] accumulator=accumulator finalizer=finalizer - Median c _ -> - accumulator = create_closure c col->map->i-> - v = col.at i - if v.is_nothing then map else (map.insert v (1 + (map.get_or_else v 0))) - finalizer = here.calculate_percentile 0.5 - create_aggregator initial=Map.empty accumulator=accumulator finalizer=finalizer - Percentile p c _ -> - accumulator = create_closure c col->map->i-> - v = col.at i - if v.is_nothing then map else (map.insert v (1 + (map.get_or_else v 0))) - finalizer = here.calculate_percentile p - create_aggregator initial=Map.empty accumulator=accumulator finalizer=finalizer - Mode c _ -> - accumulator = create_closure c col->map->i-> - v = col.at i - if v.is_nothing then map else (map.insert v (1 + (map.get_or_else v 0))) - finalizer value = (value.fold_with_key (Pair 0 Nothing) p->k->v->(if v>(p.first) then (Pair v k) else p) . second) - create_aggregator initial=Map.empty accumulator=accumulator finalizer=finalizer - Standard_Deviation c _ p -> - accumulator = create_closure c col->a->i-> - v = col.at i - if v.is_nothing then a else [a.first + 1, a.second + v, (a.at 2) + v*v] - finalizer value = if value.first == 0 then Nothing else - f = if p then 1 else (value.first / (value.first - 1)).sqrt - ((value.at 2)/value.first - (value.second/value.first)^2).sqrt * f - create_aggregator initial=[0, 0, 0] accumulator=accumulator finalizer=finalizer - Concatenate c _ join prefix suffix q -> - accumulator = create_closure c col->text->i-> - v = col.at i - val=if v.is_nothing then "" else - val_text = case v of - Text -> v - _ -> v.to_text - if is_empty q then val_text else - if (val_text == "") || (val_text.contains join) then (q+(val_text.replace q (q+q))+q) else val_text - if text.is_nothing then val else (text + join + val) - finalizer value = if value.is_nothing then value else (prefix + value + suffix) - create_aggregator accumulator=accumulator finalizer=finalizer - First c _ ignore_nothing _ -> - accumulator = case ignore_nothing of - False -> create_closure c col->current->i->(if current.second then current else [(col.at i), True]) - True -> create_closure c col->current->i->if current.second then current else - v = col.at i - if v.is_nothing then current else [v, True] - create_aggregator initial=[Nothing, False] accumulator=accumulator finalizer=(p->p.first) - Last c _ ignore_nothing _ -> - accumulator = case ignore_nothing of - False -> create_closure c col->_->i->(col.at i) - True -> create_closure c col->current->i-> - v = (col.at i) - if v.is_nothing then current else v - create_aggregator accumulator=accumulator - Maximum c _ -> - accumulator = create_closure c col->m->i-> - v = col.at i - if v.is_nothing then m else if m.is_nothing then v else m.max v - create_aggregator accumulator=accumulator - Minimum c _ -> - accumulator = create_closure c col->m->i-> - v = col.at i - if v.is_nothing then m else if m.is_nothing then v else m.min v - create_aggregator accumulator=accumulator - Shortest c _ -> - accumulator = create_closure c col->m->i-> - v = col.at i - if v.is_nothing then m else if m.is_nothing then v else if m.length <= v.length then m else v - create_aggregator accumulator=accumulator - Longest c _ -> - accumulator = create_closure c col->m->i-> - v = col.at i - if v.is_nothing then m else if m.is_nothing then v else if m.length >= v.length then m else v - create_aggregator accumulator=accumulator diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index 68fcd4bf3468..6c674e353d68 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -8,7 +8,28 @@ import Standard.Table.Internal.Problem_Builder import Standard.Table.Internal.Unique_Name_Strategy import Standard.Table.Internal.Table_Helpers -from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names +from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings + +polyglot java import org.enso.table.aggregations.Aggregator +polyglot java import org.enso.table.aggregations.Concatenate as ConcatenateAggregator +polyglot java import org.enso.table.aggregations.Count as CountAggregator +polyglot java import org.enso.table.aggregations.CountDistinct as CountDistinctAggregator +polyglot java import org.enso.table.aggregations.CountEmpty as CountEmptyAggregator +polyglot java import org.enso.table.aggregations.CountNothing as CountNothingAggregator +polyglot java import org.enso.table.aggregations.First as FirstAggregator +polyglot java import org.enso.table.aggregations.GroupBy as GroupByAggregator +polyglot java import org.enso.table.aggregations.Last as LastAggregator +polyglot java import org.enso.table.aggregations.Mean as MeanAggregator +polyglot java import org.enso.table.aggregations.MinOrMax as MinOrMaxAggregator +polyglot java import org.enso.table.aggregations.Mode as ModeAggregator +polyglot java import org.enso.table.aggregations.Percentile as PercentileAggregator +polyglot java import org.enso.table.aggregations.ShortestOrLongest as ShortestOrLongestAggregator +polyglot java import org.enso.table.aggregations.StandardDeviation as StandardDeviationAggregator +polyglot java import org.enso.table.aggregations.Sum as SumAggregator + +polyglot java import org.enso.table.data.table.problems.InvalidAggregation +polyglot java import org.enso.table.data.table.problems.FloatingPointGrouping +polyglot java import org.enso.table.data.table.problems.UnquotedDelimiter ## Result type for aggregate_columns validation - key_columns: Vector of Columns from the table to group by @@ -142,3 +163,54 @@ resolve_aggregate table problem_builder aggregate_column = ## PRIVATE A marker for missing columns during resolution. type Internal_Missing_Column_Error + +## PRIVATE + Creates a Java Aggregator for the Aggregate_Column +java_aggregator : Aggregate_Column->Aggregator +java_aggregator name column = + case column of + Group_By c _ -> GroupByAggregator.new name c.java_column + Count _ -> CountAggregator.new name + Count_Distinct columns _ ignore_nothing -> + resolved = columns.map .java_column + CountDistinctAggregator.new name resolved.to_array ignore_nothing + Count_Not_Nothing c _ -> CountNothingAggregator.new name c.java_column False + Count_Nothing c _ -> CountNothingAggregator.new name c.java_column True + Count_Not_Empty c _ -> CountEmptyAggregator.new name c.java_column False + Count_Empty c _ -> CountEmptyAggregator.new name c.java_column True + Sum c _ -> SumAggregator.new name c.java_column + Average c _ -> MeanAggregator.new name c.java_column + Standard_Deviation c _ p -> StandardDeviationAggregator.new name c.java_column p + Median c _ -> PercentileAggregator.new name c.java_column 0.5 + Percentile p c _ -> PercentileAggregator.new name c.java_column p + Mode c _ -> ModeAggregator.new name c.java_column + First c _ ignore_nothing _ -> + ## TODO [JD] add support for Order By clause + https://www.pivotaltracker.com/story/show/181414668 + FirstAggregator.new name c.java_column ignore_nothing + Last c _ ignore_nothing _ -> + ## TODO [JD] add support for Order By clause + https://www.pivotaltracker.com/story/show/181414668 + LastAggregator.new name c.java_column ignore_nothing + Maximum c _ -> MinOrMaxAggregator.new name c.java_column 1 + Minimum c _ -> MinOrMaxAggregator.new name c.java_column -1 + Shortest c _ -> ShortestOrLongestAggregator.new name c.java_column -1 + Longest c _ -> ShortestOrLongestAggregator.new name c.java_column 1 + Concatenate c _ join prefix suffix quote -> ConcatenateAggregator.new name c.java_column join prefix suffix quote + _ -> Error.throw (Invalid_Aggregation name -1 "Unsupported aggregation") + +## PRIVATE + Convert Java aggregated problems to Enso Vector of equivalents +parse_aggregated_problems : Any->Vector +parse_aggregated_problems problems = + if problems.is_nothing then [] else + problems_array = problems.getProblems + parsed = Vector.new problems_array.length i-> + p = problems_array.at i + if Java.is_instance p InvalidAggregation then Invalid_Aggregation p.getColumnName (Vector.Vector p.getRows) p.getMessage else + if Java.is_instance p FloatingPointGrouping then Floating_Point_Grouping p.getColumnName (Vector.Vector p.getRows) else + if Java.is_instance p UnquotedDelimiter then Unquoted_Delimiter p.getColumnName (Vector.Vector p.getRows) else + Invalid_Aggregation Nothing -1 "Unknown Error" + + if problems.getCount == problems_array.length then parsed else + parsed + [Additional_Warnings (problems.getCount - problems_array.length)] diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java new file mode 100644 index 000000000000..0cf5d2d468b3 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java @@ -0,0 +1,98 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.table.problems.AggregatedProblems; +import org.enso.table.data.table.problems.Problem; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/*** + * Interface used to define aggregate columns. + */ +public abstract class Aggregator { + private final String name; + private final int type; + private AggregatedProblems problems; + + protected Aggregator(String name, int type) { + this.name = name; + this.type = type; + this.problems = null; + } + + /*** + * @return Name of the new column. + */ + public final String getName() { + return name; + } + + /*** + * @return The type of the new column. + */ + public int getType() { + return type; + } + + public AggregatedProblems getProblems() { + return problems; + } + + /*** + * Compute the value for a set of rows + * @param indexes - indexes to the rows in the source table to aggregate on + * @return aggregated value + */ + public Object aggregate(int[] indexes) { + return this.aggregate(Arrays.stream(indexes).boxed().collect(Collectors.toList())); + } + + /*** + * Compute the value for a set of rows + * @param indexes - indexes to the rows in the source table to aggregate on + * @return aggregated value + */ + public abstract Object aggregate(List indexes); + + protected void addProblem(Problem problem) { + if (problems == null) { + problems = new AggregatedProblems(); + } + problems.add(problem); + } + + protected static Long CastToLong(Object value) { + if (value instanceof Long) { + return (Long)value; + } else if (value instanceof Integer) { + return ((Integer)value).longValue(); + } else if (value instanceof Byte) { + return ((Byte)value).longValue(); + } else if (value instanceof Float && ((Float)value) % 1 == 0) { + // Only return if an integer stored as a float ( % 1 == 0) + return ((Float)value).longValue(); + } else if (value instanceof Double && ((Double)value) % 1 == 0) { + // Only return if an integer stored as a double ( % 1 == 0) + return ((Double)value).longValue(); + } + + return null; + } + + protected static Double CastToDouble(Object value) { + if (value instanceof Long) { + return ((Long)value).doubleValue(); + } else if (value instanceof Integer) { + return ((Integer)value).doubleValue(); + } else if (value instanceof Byte) { + return ((Byte)value).doubleValue(); + } else if (value instanceof Float) { + return ((Float)value).doubleValue(); + } else if (value instanceof Double) { + return ((Double)value); + } + + return null; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java new file mode 100644 index 000000000000..25ec7b378cc3 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -0,0 +1,73 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; +import org.enso.table.data.table.problems.UnquotedDelimiter; + +import java.util.List; + +public class Concatenate extends Aggregator { + private final Storage storage; + private final String join; + private final String prefix; + private final String suffix; + private final String quote; + + public Concatenate(String name, Column column, String join, String prefix, String suffix, String quote) { + super(name, Storage.Type.STRING); + this.storage = column.getStorage(); + + this.join = join == null ? "" : join; + this.prefix = prefix; + this.suffix = suffix; + this.quote = quote == null ? "" : quote; + } + + @Override + public Object aggregate(List indexes) { + StringBuilder current = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value == null || value instanceof String) { + String textValue = toQuotedString(value, quote, join); + + if (quote.equals("") && textValue.contains(join)) { + this.addProblem(new UnquotedDelimiter(this.getName(), row, "Unquoted delimiter.")); + } + + if (current == null) { + current = new StringBuilder(); + current.append(textValue); + } else { + current.append(join); + current.append(textValue); + } + } else { + this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value.")); + return null; + } + } + + if (current == null) { + return null; + } + + if (prefix != null) { current.insert(0, prefix); } + current.append(suffix); + return current.toString(); + } + + private static String toQuotedString(Object value, final String quote, final String join) { + if (value == null) { + return ""; + } + + String textValue = value.toString(); + if (!quote.equals("") && (textValue.equals("") || textValue.contains(join))) { + return quote + textValue.replace(quote, quote + quote) + quote; + } + + return textValue; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Count.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Count.java new file mode 100644 index 000000000000..354c6e842b11 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Count.java @@ -0,0 +1,24 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; + +import java.util.List; + +/*** + * Aggregate Column counting the number of entries in a group. + */ +public class Count extends Aggregator { + public Count(String name) { + super(name, Storage.Type.LONG); + } + + @Override + public Object aggregate(int[] indexes) { + return indexes.length; + } + + @Override + public Object aggregate(List indexes) { + return indexes.size(); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java new file mode 100644 index 000000000000..a4df350e1995 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java @@ -0,0 +1,48 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.index.MultiValueKey; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.FloatingPointGrouping; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/*** + * Aggregate Column counting the number of distinct items in a group. + * If `ignoreAllNull` is true, does count when all items are null. + */ +public class CountDistinct extends Aggregator { + private final Storage[] storage; + private final boolean ignoreAllNull; + + /** + * Constructs a CountDistinct Aggregator + * @param name output column name + * @param columns input columns + * @param ignoreAllNull if true ignore then all values are null + */ + public CountDistinct(String name, Column[] columns, boolean ignoreAllNull) { + super(name, Storage.Type.LONG); + this.storage = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new); + this.ignoreAllNull = ignoreAllNull; + } + + @Override + public Object aggregate(List indexes) { + Set set = new HashSet<>(); + for (int row: indexes) { + MultiValueKey key = new MultiValueKey(Arrays.stream(storage).map(s->s.getItemBoxed(row)).toArray()); + if (key.hasFloatValues()) { + this.addProblem(new FloatingPointGrouping(this.getName(), row)); + } + + if (!ignoreAllNull || !key.areAllNull()) { + set.add(key); + } + } + return set.size(); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java new file mode 100644 index 000000000000..d33e92b8fe2c --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java @@ -0,0 +1,44 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column counting the number of (non-)empty entries in a group. + * If `isEmpty` is true, counts null or empty entries. + * If `isEmpty` is false, counts non-empty entries. + */ +public class CountEmpty extends Aggregator { + private final Storage storage; + private final boolean isEmpty; + + /** + * Constructs a CountNothing Aggregator + * @param name output column name + * @param column input column + * @param isEmpty true to count nulls or empty, false to count non-empty + */ + public CountEmpty(String name, Column column, boolean isEmpty) { + super(name, Storage.Type.LONG); + this.storage = column.getStorage(); + this.isEmpty = isEmpty; + } + + @Override + public Object aggregate(List indexes) { + int count = 0; + for (int row : indexes) { + Object value = storage.getItemBoxed(row); + if (value != null && !(value instanceof String)) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value.")); + return null; + } + + count += ((value == null || ((String) value).length() == 0) == isEmpty ? 1 : 0); + } + return count; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java new file mode 100644 index 000000000000..1e7e540c90ee --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java @@ -0,0 +1,37 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; + +import java.util.List; + +/*** + * Aggregate Column counting the number of (not-)null entries in a group. + * If `isNothing` is true, counts null entries. + * If `isNothing` is false, counts non-null entries. + */ +public class CountNothing extends Aggregator { + private final Storage storage; + private final boolean isNothing; + + /** + * Constructs a CountNothing Aggregator + * @param name output column name + * @param column input column + * @param isNothing true to count nulls, false to count non-nulls + */ + public CountNothing(String name, Column column, boolean isNothing) { + super(name, Storage.Type.LONG); + this.storage = column.getStorage(); + this.isNothing = isNothing; + } + + @Override + public Object aggregate(List indexes) { + long count = 0; + for (int row: indexes) { + count += ((storage.getItemBoxed(row) == null) == isNothing ? 1 : 0); + } + return count; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/First.java b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java new file mode 100644 index 000000000000..4aed71c4229b --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java @@ -0,0 +1,31 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; + +import java.util.List; + +/*** + * Aggregate Column finding the first value in a group. + */ +public class First extends Aggregator { + private final Storage storage; + private final boolean ignoreNothing; + + public First(String name, Column column, boolean ignoreNothing) { + super(name, Storage.Type.OBJECT); + this.storage = column.getStorage(); + this.ignoreNothing = ignoreNothing; + } + + @Override + public Object aggregate(List indexes) { + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (!ignoreNothing || value != null) { + return value; + } + } + return null; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java new file mode 100644 index 000000000000..885f16d54452 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java @@ -0,0 +1,23 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; + +import java.util.List; + +/*** + * Aggregate Column getting the grouping key. + */ +public class GroupBy extends Aggregator { + private final Storage storage; + + public GroupBy(String name, Column column) { + super(name, Storage.Type.OBJECT); + storage = column.getStorage(); + } + + @Override + public Object aggregate(List indexes) { + return indexes.isEmpty() ? null : storage.getItemBoxed(indexes.get(0)); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java new file mode 100644 index 000000000000..e92a59e95abd --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java @@ -0,0 +1,28 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; + +import java.util.List; + +public class Last extends Aggregator { + private final Storage storage; + private final boolean ignoreNothing; + + public Last(String name, Column column, boolean ignoreNothing) { + super(name, Storage.Type.OBJECT); + this.storage = column.getStorage(); + this.ignoreNothing = ignoreNothing; + } + + @Override + public Object aggregate(List indexes) { + for (int i = indexes.size() - 1; i >= 0; i--) { + Object value = storage.getItemBoxed(indexes.get(i)); + if (!ignoreNothing || value != null) { + return value; + } + } + return null; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java new file mode 100644 index 000000000000..9547f9003033 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java @@ -0,0 +1,52 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column computing the mean value in a group. + */ +public class Mean extends Aggregator { + private static class Calculation { + public long count; + public double total; + + public Calculation(double value) { + count = 1; + total = value; + } + } + + private final Storage storage; + + public Mean(String name, Column column) { + super(name, Storage.Type.DOUBLE); + this.storage = column.getStorage(); + } + + @Override + public Object aggregate(List indexes) { + Calculation current = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + Double dValue = CastToDouble(value); + if (dValue == null) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + return null; + } + + if (current == null) { + current = new Calculation(dValue); + } else { + current.count++; + current.total += dValue; + } + } + } + return current == null ? null : current.total / current.count; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java new file mode 100644 index 000000000000..6d1ccb633a67 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java @@ -0,0 +1,73 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column finding the minimum (minOrMax = -1) or maximum (minOrMax = 1) entry in a group. + */ +public class MinOrMax extends Aggregator { + private final Storage storage; + private final int minOrMax; + + /** + * Constructs a MinOrMax Aggregator + * @param name output column name + * @param column input column + * @param minOrMax <0 for minimum, >0 for maximum + */ + public MinOrMax(String name, Column column, int minOrMax) { + super(name, Storage.Type.OBJECT); + this.storage = column.getStorage(); + this.minOrMax = Integer.signum(minOrMax); + } + + @Override + public Object aggregate(List indexes) { + Object current = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + try { + if (current == null || Integer.signum(Compare(current, value)) == minOrMax) { + current = value; + } + } catch (ClassCastException e) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot compare values.")); + return null; + } + } + } + return current; + } + + private static int Compare(Object current, Object value) { + if (current instanceof String && value instanceof String) { + return ((String)value).compareTo((String)current); + } + + if (current instanceof Long) { + Long lValue = CastToLong(value); + if (null != lValue) { + return Long.compare(lValue, (Long)current); + } + + Double dValue = CastToDouble(value); + if (null != dValue) { + return Double.compare(dValue, (Long)current); + } + } + + if (current instanceof Double) { + Double dValue = CastToDouble(value); + if (null != dValue) { + return Double.compare(dValue, (Double)current); + } + } + + throw new ClassCastException(); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java new file mode 100644 index 000000000000..c81a048612c3 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java @@ -0,0 +1,59 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.FloatingPointGrouping; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/*** + * Aggregate Column computing the most common value in a group (ignoring Nothing). + */ +public class Mode extends Aggregator { + private final Storage storage; + + public Mode(String name, Column column) { + super(name, Storage.Type.OBJECT); + this.storage = column.getStorage(); + } + + @Override + public Object aggregate(List indexes) { + Object current = null; + int count = 0; + Map currentMap = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + // Merge all numbers onto a Long if possible or a Double if needed + Long lValue = CastToLong(value); + if (lValue == null) { + Double dValue = CastToDouble(value); + if (dValue != null) { + this.addProblem(new FloatingPointGrouping(this.getName(), row)); + value = dValue; + } + } else { + value = lValue; + } + + if (current == null) { + current = value; + count = 1; + currentMap = new HashMap<>(); + currentMap.put(value, 1); + } else { + int newCount = currentMap.getOrDefault(value, 0) + 1; + currentMap.put(value, newCount); + if (newCount > count) { + count = newCount; + current = value; + } + } + } + } + return current; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java new file mode 100644 index 000000000000..82ebf90de8d7 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java @@ -0,0 +1,78 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.*; + +/*** + * Aggregate Column computing a percentile value in a group. + */ +public class Percentile extends Aggregator { + private final Storage storage; + private final double percentile; + + public Percentile(String name, Column column, double percentile) { + super(name, Storage.Type.DOUBLE); + this.storage = column.getStorage(); + this.percentile = percentile; + } + + @Override + public Object aggregate(List indexes) { + int count = 0; + SortedMap currentMap = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + Double dValue = CastToDouble(value); + + if (dValue == null) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + return null; + } else if (count == 0) { + count = 1; + currentMap = new TreeMap<>(); + currentMap.put(dValue, 1); + } else { + count++; + currentMap.put(dValue, currentMap.getOrDefault(dValue, 0) + 1); + } + } + } + + if (count == 0) { + return null; + } + + double mid_value = (count - 1) * percentile + 1; + if (mid_value <= 1) { + return currentMap.firstKey(); + } else if (mid_value >= count) { + return currentMap.lastKey(); + } + + double mid = Math.floor(mid_value); + + double first = 0; + int current = 0; + for (Map.Entry entry : currentMap.entrySet()) { + int nextCurrent = current + entry.getValue(); + + if (current <= mid - 1 && nextCurrent > mid - 1) { + first = entry.getKey(); + } + + if (current <= mid && nextCurrent > mid) { + double second = entry.getKey(); + return first + (second - first) * (mid_value - mid); + } + + current = nextCurrent; + } + + this.addProblem(new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); + return null; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java new file mode 100644 index 000000000000..57dcce09375b --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java @@ -0,0 +1,58 @@ +package org.enso.table.aggregations; + +import com.ibm.icu.text.BreakIterator; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column finding the longest or shortest string in a group. + */ +public class ShortestOrLongest extends Aggregator { + private final Storage storage; + private final int minOrMax; + + public ShortestOrLongest(String name, Column column, int minOrMax) { + super(name, Storage.Type.STRING); + this.storage = column.getStorage(); + this.minOrMax = minOrMax; + } + + @Override + public Object aggregate(List indexes) { + long length = 0; + Object current = null; + + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + if (!(value instanceof String)) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value.")); + return null; + } + + long valueLength = GraphemeLength((String)value); + if (current == null || Long.compare(valueLength, length) == minOrMax) { + length = valueLength; + current = value; + } + } + } + + return current; + } + + private static long GraphemeLength(String text) { + BreakIterator iter = BreakIterator.getCharacterInstance(); + iter.setText(text); + + int count = 0; + for (int end = iter.next(); end != BreakIterator.DONE; end = iter.next()) { + count++; + } + + return count; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java new file mode 100644 index 000000000000..a738c392da6d --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java @@ -0,0 +1,59 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column computing the standard deviation of a group. + */ +public class StandardDeviation extends Aggregator { + private static class Calculation { + public long count; + public double total; + public double total_sqr; + + public Calculation(double value) { + count = 1; + total = value; + total_sqr = value * value; + } + } + + private final Storage storage; + private final boolean population; + + public StandardDeviation(String name, Column column,boolean population) { + super(name, Storage.Type.DOUBLE); + this.storage = column.getStorage(); + this.population = population; + } + + @Override + public Object aggregate(List indexes) { + Calculation current = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + Double dValue = CastToDouble(value); + if (dValue == null) { + this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + return null; + } + + if (current == null) { + current = new Calculation(dValue); + } else { + current.count++; + current.total += dValue; + current.total_sqr += dValue*dValue; + } + } + } + return current == null ? null : + (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) * + Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java new file mode 100644 index 000000000000..3f8bb49aca50 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java @@ -0,0 +1,48 @@ +package org.enso.table.aggregations; + +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.problems.InvalidAggregation; + +import java.util.List; + +/*** + * Aggregate Column computing the total value in a group. + */ +public class Sum extends Aggregator { + private final Storage storage; + + public Sum(String name, Column column) { + super(name, Storage.Type.DOUBLE); + this.storage = column.getStorage(); + } + + @Override + public Object aggregate(List indexes) { + Object current = null; + for (int row: indexes) { + Object value = storage.getItemBoxed(row); + if (value != null) { + if (current == null) { + current = 0L; + } + + Long lCurrent = CastToLong(current); + Long lValue = CastToLong(value); + if (lCurrent != null && lValue != null) { + current = lCurrent + lValue; + } else { + Double dCurrent = CastToDouble(current); + Double dValue = CastToDouble(value); + if (dCurrent != null && dValue != null) { + current = dCurrent + dValue; + } else { + this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + return null; + } + } + } + } + return current; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java index d9df1197b1d5..0f476634c1b1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java @@ -71,6 +71,10 @@ public void appendNoGrow(Object o) { data[currentSize++] = Double.doubleToRawLongBits((Double) o); } else if (!isDouble && o instanceof Long) { data[currentSize++] = (Long) o; + } else if (!isDouble && o instanceof Integer) { + data[currentSize++] = ((Integer) o).longValue(); + } else if (!isDouble && o instanceof Byte) { + data[currentSize++] = ((Byte) o).longValue(); } else if (isDouble && o instanceof BigDecimal) { data[currentSize++] = Double.doubleToRawLongBits(((BigDecimal) o).doubleValue()); } else { diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java index c9d110a30bda..52c6b2843f9c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java @@ -1,6 +1,5 @@ package org.enso.table.data.index; -import org.enso.table.data.column.storage.LongStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.table.Column; @@ -11,7 +10,6 @@ public class HashIndex extends Index { private final Storage items; private final Map> locs; private final String name; - private Index uniqueIndex = null; private HashIndex(Storage items, Map> locs, String name) { this.items = items; @@ -34,6 +32,7 @@ public static HashIndex fromStorage(String name, Storage storage) { return new HashIndex(name, storage, storage.size()); } + @Override public Object iloc(int i) { return items.getItemBoxed(i); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java new file mode 100644 index 000000000000..f3a4775a15d2 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java @@ -0,0 +1,95 @@ +package org.enso.table.data.index; + +import org.enso.table.aggregations.Aggregator; +import org.enso.table.data.column.builder.object.*; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.table.Column; +import org.enso.table.data.table.Table; +import org.enso.table.data.table.problems.AggregatedProblems; +import org.enso.table.data.table.problems.FloatingPointGrouping; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class MultiValueIndex { + private final int keyColumnsLength; + private final Map> locs; + private final AggregatedProblems problems; + + public MultiValueIndex(Column[] keyColumns, int tableSize) { + this.keyColumnsLength = keyColumns.length; + this.locs = new HashMap<>(); + this.problems = new AggregatedProblems(); + + if (keyColumns.length != 0) { + int size = keyColumns[0].getSize(); + for (int i = 0; i < size; i++) { + int finalI = i; + MultiValueKey key = new MultiValueKey(Arrays.stream(keyColumns).map(c -> c.getStorage().getItemBoxed(finalI)).toArray()); + + if (key.hasFloatValues()) { + problems.add(new FloatingPointGrouping("GroupBy", i)); + } + + List ids = this.locs.computeIfAbsent(key, x -> new ArrayList<>()); + ids.add(i); + } + } else { + this.locs.put(new MultiValueKey(new Object[0]), IntStream.range(0, tableSize).boxed().collect(Collectors.toList())); + } + } + + public Table makeTable(Aggregator[] columns) { + final int length = columns.length; + final int size = locs.size(); + + boolean emptyScenario = size == 0 & keyColumnsLength == 0; + Builder[] storage = Arrays.stream(columns) + .map(c -> getBuilderForType(c.getType(), emptyScenario ? 1 : size)) + .toArray(Builder[]::new); + + if (emptyScenario) { + // No grouping and no data + List empty = new ArrayList<>(); + for (int i = 0; i < length; i++) { + storage[i].appendNoGrow(columns[i].aggregate(empty)); + } + } else { + for (List group_locs : this.locs.values()) { + for (int i = 0; i < length; i++) { + Object value = columns[i].aggregate(group_locs); + storage[i].appendNoGrow(value); + } + } + } + + // Merge Problems + AggregatedProblems[] problems = new AggregatedProblems[1 + length]; + problems[0] = this.problems; + IntStream.range(0, length).forEach(i -> problems[i+1] = columns[i].getProblems()); + AggregatedProblems merged = AggregatedProblems.merge(problems); + + return new Table( + IntStream.range(0, length) + .mapToObj(i -> new Column(columns[i].getName(), storage[i].seal())) + .toArray(Column[]::new), + merged); + } + + private static Builder getBuilderForType(int type, int size) { + switch (type) { + case Storage.Type.BOOL: return new BoolBuilder(); + case Storage.Type.DOUBLE: return NumericBuilder.createDoubleBuilder(size); + case Storage.Type.LONG: return NumericBuilder.createLongBuilder(size); + case Storage.Type.STRING: return new StringBuilder(size); + case Storage.Type.OBJECT: return new ObjectBuilder(size); + } + return new InferredBuilder(size); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKey.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKey.java new file mode 100644 index 000000000000..dfef2ee249ea --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKey.java @@ -0,0 +1,74 @@ +package org.enso.table.data.index; + +import java.util.Arrays; + +public class MultiValueKey { + private final Object[] values; + private final int hashCodeValue; + private final boolean allNull; + private final boolean floatValue; + + public MultiValueKey(Object[] values) { + this.values = values; + + boolean allNull = true; + boolean floatValue = false; + + // Precompute HashCode - using Apache.Commons.Collections.Map.MultiKeyMap.hash algorithm + int h = 0; + for (Object value: this.values) { + if (value != null) { + Object folded = FoldObject(value); + floatValue = floatValue || (folded instanceof Double); + h ^= folded.hashCode(); + allNull = false; + } + } + h += ~(h << 9); + h ^= h >>> 14; + h += h << 4; + + this.hashCodeValue = h ^ (h >>> 10); + this.allNull = allNull; + this.floatValue = floatValue; + } + + @Override + public int hashCode() { + return this.hashCodeValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MultiValueKey that = (MultiValueKey) o; + return hashCodeValue == that.hashCodeValue && Arrays.equals(values, that.values); + } + + public boolean areAllNull() { + return allNull; + } + + public boolean hasFloatValues() { return floatValue; } + + protected static Object FoldObject(Object value) { + if (value instanceof Long) { + return value; + } else if (value instanceof Integer) { + return ((Integer)value).longValue(); + } else if (value instanceof Byte) { + return ((Byte)value).longValue(); + } else if (value instanceof Float && ((Float)value) % 1 == 0) { + return ((Float)value).longValue(); + } else if (value instanceof Double && ((Double)value) % 1 == 0) { + return ((Double)value).longValue(); + } else if (value instanceof Float) { + return ((Float)value).doubleValue(); + } else if (value instanceof Double) { + return value; + } + + return value; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index 6cfcc411a23a..8a5b4001c064 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -13,8 +13,10 @@ import org.enso.table.data.index.DefaultIndex; import org.enso.table.data.index.HashIndex; import org.enso.table.data.index.Index; +import org.enso.table.data.index.MultiValueIndex; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.table.aggregate.AggregateTable; +import org.enso.table.data.table.problems.AggregatedProblems; import org.enso.table.error.NoSuchColumnException; import org.enso.table.error.UnexpectedColumnTypeException; @@ -23,6 +25,7 @@ public class Table { private final Column[] columns; private final Index index; + private final AggregatedProblems problems; /** * Creates a new table @@ -30,14 +33,21 @@ public class Table { * @param columns the columns contained in this table. */ public Table(Column[] columns) { - this( - columns, - new DefaultIndex((columns == null || columns.length == 0) ? 0 : columns[0].getSize())); + this(columns, null, null); } public Table(Column[] columns, Index index) { + this(columns, index, null); + } + + public Table(Column[] columns, AggregatedProblems problems) { + this(columns, null, problems); + } + + private Table(Column[] columns, Index index, AggregatedProblems problems) { this.columns = columns; - this.index = index; + this.index = index == null ? (new DefaultIndex((columns == null || columns.length == 0) ? 0 : columns[0].getSize())) : index; + this.problems = problems; } /** @return the number of rows in this table */ @@ -54,6 +64,13 @@ public Column[] getColumns() { return columns; } + /** + * @return Attached set of any problems from the Java side + */ + public AggregatedProblems getProblems() { + return problems; + } + /** * Returns a column with the given name, or null if it doesn't exist. * @@ -192,6 +209,16 @@ public Table indexFromColumn(String name) { return indexFromColumn(col); } + /** + * Creates an index fpr this table by using values from the specified columns. + * + * @param columns set of columns to use as an Index + * @return a table indexed by the proper column + */ + public MultiValueIndex indexFromColumns(Column[] columns) { + return new MultiValueIndex(columns, this.rowCount()); + } + /** * Selects a subset of columns of this table, by names. * diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java new file mode 100644 index 000000000000..902334a7afd0 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java @@ -0,0 +1,68 @@ +package org.enso.table.data.table.problems; + +import java.util.ArrayList; +import java.util.List; + +public class AggregatedProblems { + private final List problems; + private final int maxSize; + private int count; + + private AggregatedProblems(List problems, int count) { + this.problems = problems; + this.maxSize = problems.size(); + this.count = count; + } + + public AggregatedProblems() { + this(10); + } + + public AggregatedProblems(int maxSize) { + this.problems = new ArrayList<>(); + this.maxSize = maxSize; + this.count = 0; + } + + public Problem[] getProblems() { + if (count == 0) { + return new Problem[0]; + } + + return problems.toArray(Problem[]::new); + } + + public int getCount() { + return count; + } + + public void add(Problem problem) { + if (problem instanceof ColumnAggregatedProblems) { + for (Problem p : problems) { + if (p instanceof ColumnAggregatedProblems && + ((ColumnAggregatedProblems) p).merge((ColumnAggregatedProblems)problem)) { + return; + } + } + } + + if (problems.size() < maxSize) { + problems.add(problem); + } + count++; + } + + public static AggregatedProblems merge(AggregatedProblems[] problems) { + List merged = new ArrayList<>(); + int count = 0; + + for (AggregatedProblems p : problems) { + if (p != null && p.count > 0) { + merged.addAll(p.problems); + count += p.getCount(); + } + } + + return new AggregatedProblems(merged, count); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/ColumnAggregatedProblems.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/ColumnAggregatedProblems.java new file mode 100644 index 000000000000..33e48432177a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/ColumnAggregatedProblems.java @@ -0,0 +1,29 @@ +package org.enso.table.data.table.problems; + +import java.util.ArrayList; +import java.util.List; + +public abstract class ColumnAggregatedProblems implements Problem { + private final String columnName; + protected final List rows; + + protected ColumnAggregatedProblems(String columnName, int row) { + this.columnName = columnName; + this.rows = new ArrayList<>(); + this.rows.add(row); + } + + public String getColumnName() { + return columnName; + } + + public int[] getRows() { + return rows.stream().mapToInt(Integer::intValue).toArray(); + } + + public int count() { return rows.size(); } + + public abstract boolean merge(ColumnAggregatedProblems another); + + public abstract String getMessage(); +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/FloatingPointGrouping.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/FloatingPointGrouping.java new file mode 100644 index 000000000000..a93361d99daa --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/FloatingPointGrouping.java @@ -0,0 +1,23 @@ +package org.enso.table.data.table.problems; + +public class FloatingPointGrouping extends ColumnAggregatedProblems { + public FloatingPointGrouping(String columnName, int row) { + super(columnName, row); + } + + @Override + public String getMessage() { + return "Grouping on floating points is not recommended."; + } + + @Override + public boolean merge(ColumnAggregatedProblems another) { + if (another instanceof FloatingPointGrouping && + this.getColumnName().equals(another.getColumnName())) { + this.rows.addAll(another.rows); + return true; + } + + return false; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/InvalidAggregation.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/InvalidAggregation.java new file mode 100644 index 000000000000..41c08ee185b6 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/InvalidAggregation.java @@ -0,0 +1,25 @@ +package org.enso.table.data.table.problems; + +public class InvalidAggregation extends ColumnAggregatedProblems { + private final String message; + + public InvalidAggregation(String columnName, int row, String message) { + super(columnName, row); + this.message = message; + } + + @Override + public String getMessage() { return message; } + + @Override + public boolean merge(ColumnAggregatedProblems another) { + if (another instanceof InvalidAggregation && + this.getColumnName().equals(another.getColumnName()) && + this.message.equals(((InvalidAggregation) another).message)) { + this.rows.addAll(another.rows); + return true; + } + + return false; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/Problem.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/Problem.java new file mode 100644 index 000000000000..cf8f55115f72 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/Problem.java @@ -0,0 +1,8 @@ +package org.enso.table.data.table.problems; + +import java.util.List; + +public interface Problem { + String getMessage(); +} + diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/problems/UnquotedDelimiter.java b/std-bits/table/src/main/java/org/enso/table/data/table/problems/UnquotedDelimiter.java new file mode 100644 index 000000000000..405c46a8063a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/UnquotedDelimiter.java @@ -0,0 +1,25 @@ +package org.enso.table.data.table.problems; + +public class UnquotedDelimiter extends ColumnAggregatedProblems { + private final String message; + + public UnquotedDelimiter(String columnName, int row, String message) { + super(columnName, row); + this.message = message; + } + + @Override + public String getMessage() { return message; } + + @Override + public boolean merge(ColumnAggregatedProblems another) { + if (another instanceof UnquotedDelimiter && + this.getColumnName().equals(another.getColumnName()) && + this.message.equals(((UnquotedDelimiter) another).message)) { + this.rows.addAll(another.rows); + return true; + } + + return false; + } +} diff --git a/test/Benchmarks/src/Table/Aggregate.enso b/test/Benchmarks/src/Table/Aggregate.enso index 41322f870c74..35282407546a 100644 --- a/test/Benchmarks/src/Table/Aggregate.enso +++ b/test/Benchmarks/src/Table/Aggregate.enso @@ -10,8 +10,8 @@ from Standard.Table.Data.Aggregate_Column import all ## Bench Utilities ============================================================ vector_size = 2500 -iter_size = 100 -num_iterations = 10 +iter_size = 20 +num_iterations = 3 create_table : Integer->Integer->Table create_table rows (seed=1646322139) = @@ -31,19 +31,25 @@ main = table = here.create_table here.vector_size Bench.measure (table.aggregate [Count Nothing]) "Count table" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Maximum "ValueWithNothing"]) "Max table" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Sum "ValueWithNothing"]) "Sum table" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations Bench.measure (table.aggregate [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Median "Value"]) "Median table" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Mode "Index"]) "Mode table" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Median "Value"]) "Median table" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Mode "Index"]) "Mode table" here.iter_size here.num_iterations Bench.measure (table.aggregate [Group_By "Index", Count Nothing]) "Count grouped" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Group_By "Index", Maximum "ValueWithNothing"]) "Max table" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Group_By "Index", Sum "ValueWithNothing"]) "Sum table" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations Bench.measure (table.aggregate [Group_By "Index", Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Median "Value"]) "Median grouped" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Median "Value"]) "Median grouped" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Maximum "ValueWithNothing"]) "Max table" here.iter_size here.num_iterations + Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Sum "ValueWithNothing"]) "Sum table" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations - Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations + # Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations diff --git a/test/Table_Tests/src/Aggregate_Column_Spec.enso b/test/Table_Tests/src/Aggregate_Column_Spec.enso index be285338a72c..28d9ffa9c6a0 100644 --- a/test/Table_Tests/src/Aggregate_Column_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Column_Spec.enso @@ -3,7 +3,6 @@ from Standard.Base import all import Standard.Table.Data.Table from Standard.Table.Data.Aggregate_Column import all from Standard.Table.Data.Column_Selector import By_Name, By_Index -import Standard.Table.Internal.Aggregate_Column_Aggregator import Standard.Table.Internal.Aggregate_Column_Helper import Standard.Table.Internal.Problem_Builder @@ -28,9 +27,10 @@ spec = Test.group "Aggregate Columns" <| overridden -> overridden name . should_equal expected_name - acc = Aggregate_Column_Aggregator.new resolved - folded_value = 0.up_to table.row_count . fold acc.initial acc.accumulator - result = acc.finalizer folded_value + result = + acc = Aggregate_Column_Helper.java_aggregator "Name" resolved + indexes = Vector.new table.row_count v->v + acc.aggregate indexes.to_array if epsilon != False then ((result - expected_result).abs < epsilon).should_be_true else result.should_equal expected_result diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index a56dfd69596c..fa3501aa4202 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -3,14 +3,15 @@ from Standard.Base import all import Standard.Table from Standard.Table.Data.Column_Selector import By_Name, By_Index from Standard.Table.Data.Aggregate_Column import all -from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names +from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings import Standard.Test import Standard.Test.Problems +import Standard.Base.Error.Problem_Behavior -type Test_Selection problem_handling=True advanced_stats=True text=True first_last=True std_dev=True multi_distinct=True +type Test_Selection problem_handling=True advanced_stats=True text=True first_last=True std_dev=True multi_distinct=True aggregation_problems=True -all_tests = Test_Selection True True True True +all_tests = Test_Selection True True True True True spec = file_contents = (Enso_Project.data / "data.csv") . read @@ -34,6 +35,9 @@ spec = - pending: An optional mark to disable all test groups. Can be used to indicate that some tests are disabled due to missing test setup. aggregate_spec prefix table empty_table materialize test_selection=here.all_tests pending=Nothing = + expect_column_names names table = + table.columns . map .name . should_equal names frames_to_skip=2 + find_row key table (columns=Nothing) = table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x) 0.up_to table.row_count . find i-> @@ -676,9 +680,6 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test col2 = ["Value", [1, 2, 3]] Table.new [col1, col2] - expect_column_names names table = - table.columns . map .name . should_equal names frames_to_skip=2 - Test.specify "should raise a warning when there are no output columns" <| action = table.aggregate [] on_problems=_ problems = [No_Output_Columns] @@ -733,4 +734,97 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test tester = expect_column_names [] Problems.test_problem_handling action problems tester + aggregate_pending = case pending.is_nothing of + False -> pending + True -> if test_selection.aggregation_problems.not then "Not supported." + Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=aggregate_pending <| + table = + col1 = ["Index", [1, 2, 3]] + col2 = ["Value", [1, 2, 3.1]] + col3 = ["Text", ["A", ",", "C"]] + col4 = ["Mixed", ["A", 1, "C"]] + Table.new [col1, col2, col3, col4] + + Test.specify "should warn if grouping on a floating point" <| + action = table.aggregate [Group_By 1] on_problems=_ + problems = [Floating_Point_Grouping "GroupBy" [2]] + tester = expect_column_names ["Value"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if totaling on a non number" <| + action = table.aggregate [Sum "Text"] on_problems=_ + problems = [Invalid_Aggregation "Sum Text" [0] "Cannot convert to a number."] + tester = expect_column_names ["Sum Text"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if averaging on a non number" <| + action = table.aggregate [Average "Text"] on_problems=_ + problems = [Invalid_Aggregation "Average Text" [0] "Cannot convert to a number."] + tester = expect_column_names ["Average Text"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if calculating standard deviation on a non number" <| + action = table.aggregate [Standard_Deviation "Text"] on_problems=_ + problems = [Invalid_Aggregation "Standard Deviation Text" [0] "Cannot convert to a number."] + tester = expect_column_names ["Standard Deviation Text"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if median on a non number" <| + action = table.aggregate [Median "Text"] on_problems=_ + problems = [Invalid_Aggregation "Median Text" [0] "Cannot convert to a number."] + tester = expect_column_names ["Median Text"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if trying shortest on a non text" <| + action = table.aggregate [Shortest "Index"] on_problems=_ + problems = [Invalid_Aggregation "Shortest Index" [0] "Not a text value."] + tester = expect_column_names ["Shortest Index"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if trying count empties on a non text" <| + action = table.aggregate [Count_Empty "Index"] on_problems=_ + problems = [Invalid_Aggregation "Count Empty Index" [0] "Not a text value."] + tester = expect_column_names ["Count Empty Index"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if trying concatenate on a non text" <| + action = table.aggregate [Concatenate "Index"] on_problems=_ + problems = [Invalid_Aggregation "Concatenate Index" [0] "Not a text value."] + tester = expect_column_names ["Concatenate Index"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if trying concatenate unquoted delimiters" <| + column = Concatenate "Text" separator="," + action = table.aggregate [column] on_problems=_ + problems = [Unquoted_Delimiter "Concatenate Text" [1]] + tester = expect_column_names ["Concatenate Text"] + Problems.test_problem_handling action problems tester + + Test.specify "should warn if can't compare value for Min or Max" <| + action = table.aggregate [Maximum "Mixed"] on_problems=_ + problems = [Invalid_Aggregation "Maximum Mixed" [1] "Cannot compare values."] + tester = expect_column_names ["Maximum Mixed"] + Problems.test_problem_handling action problems tester + + Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=aggregate_pending <| + table = + col1 = ["Key", ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"]] + col2 = ["Value", [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]] + col3 = ["Float", col2.second.map x->(1.5*x)] + Table.new [col1, col2, col3] + + Test.specify "should merge Invalid Aggregation warnings" <| + new_table = table.aggregate [Group_By "Key", Concatenate "Value"] + problems = Warning.get_all new_table . map .value + problems.length . should_equal 1 + problems.at 0 . is_an Invalid_Aggregation . should_be_true + problems.at 0 . rows . length . should_equal 15 + + Test.specify "should merge Floating Point Grouping warnings" <| + new_table = table.aggregate [Group_By "Float", Count Nothing] + problems = Warning.get_all new_table . map .value + problems.length . should_equal 1 + problems.at 0 . is_an Floating_Point_Grouping . should_be_true + problems.at 0 . rows . length . should_equal 9 + main = Test.Suite.run_main here.spec diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index b885bbcb41b3..ae97a5747950 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -52,7 +52,7 @@ run_tests connection pending=Nothing = here.postgres_specific_spec connection pending=pending Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending - selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False + selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table tables.append agg_table.name diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index 0e0197dff325..7bd47f8b3eb6 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -52,7 +52,7 @@ run_tests connection pending=Nothing = here.redshift_specific_spec connection pending=pending Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending - selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False + selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table tables.append agg_table.name diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index ebb824ad0ab8..2bdde182b70c 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -62,7 +62,7 @@ spec = here.sqlite_specific_spec connection Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False - selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False + selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) diff --git a/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-ignore b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-ignore new file mode 100644 index 000000000000..2111bf851e49 --- /dev/null +++ b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-ignore @@ -0,0 +1,4 @@ +License & terms of use: http://www.unicode.org/copyright.html#License +Does not write a copyright string. +2000.01.01 1.06 copyright update */ +2000.01.01 copyright update [Y2K has arrived] */ diff --git a/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-keep b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-keep new file mode 100644 index 000000000000..395f6802a743 --- /dev/null +++ b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/copyright-keep @@ -0,0 +1,203 @@ +

Copyright © IBM Corporation 1999. All rights reserved. +Copyright (C) 1996-2004, International Business Machines Corporation and * +Copyright (C) 1996-2005, International Business Machines Corporation and * +Copyright (C) 1996-2007, International Business Machines Corporation and * +Copyright (C) 1996-2007, International Business Machines Corporation and * +Copyright (C) 1996-2009, Google, International Business Machines Corporation and * +Copyright (C) 1996-2009, International Business Machines Corporation and * +Copyright (C) 1996-2010, International Business Machines Corporation and +Copyright (C) 1996-2010, International Business Machines Corporation and * +Copyright (C) 1996-2010, International Business Machines Corporation and * +Copyright (C) 1996-2011, International Business Machines Corporation and +Copyright (C) 1996-2011, International Business Machines Corporation and * +Copyright (C) 1996-2011, International Business Machines Corporation and * +Copyright (C) 1996-2012, International Business Machines Corporation and * +Copyright (C) 1996-2013, International Business Machines Corporation and +Copyright (C) 1996-2013, International Business Machines Corporation and * +Copyright (C) 1996-2014, International Business Machines Corporation and +Copyright (C) 1996-2014, International Business Machines Corporation and * +Copyright (C) 1996-2015, Google, Inc., International Business Machines Corporation and +Copyright (C) 1996-2015, International Business Machines +Copyright (C) 1996-2015, International Business Machines Corporation and +Copyright (C) 1996-2015, International Business Machines Corporation and * +Copyright (C) 1996-2016, Google, International Business Machines Corporation and +Copyright (C) 1996-2016, International Business Machines Corporation and +Copyright (C) 1996-2016, International Business Machines +Copyright (C) 1996-2016, International Business Machines Corporation and * +Copyright (C) 1996-2016, International Business Machines Corporation and * +Copyright (C) 1999-2014, International Business Machines +Copyright (C) 1999-2015, International Business Machines +Copyright (C) 2000-2009, International Business Machines Corporation and * +Copyright (C) 2000-2014, International Business Machines +Copyright (C) 2000-2014, International Business Machines Corporation and +Copyright (C) 2000-2016, International Business Machines Corporation and +Copyright (C) 2001-2004, International Business Machines Corporation and * +Copyright (C) 2001-2008, International Business Machines +Copyright (C) 2001-2009, International Business Machines Corporation and * +Copyright (C) 2001-2010, International Business Machines +Copyright (C) 2001-2010, International Business Machines Corporation and * +Copyright (C) 2001-2011, International Business Machines Corporation and * +Copyright (C) 2001-2012, International Business Machines +Copyright (C) 2001-2013, International Business Machines Corporation and * +Copyright (C) 2001-2014, International Business Machines +Copyright (C) 2001-2016 International Business Machines Corporation and +Copyright (C) 2001-2015, International Business Machines Corporation and +Copyright (C) 2001-2016, International Business Machines +Copyright (C) 2001-2016, International Business Machines Corporation and +Copyright (C) 2001-2016, International Business Machines Corporation and * +Copyright (C) 2002-2009 International Business Machines Corporation * +Copyright (C) 2002-2010, International Business Machines +Copyright (C) 2002-2010, International Business Machines Corporation and * +Copyright (C) 2002-2014, International Business Machines Corporation and others. +Copyright (C) 2002-2016, International Business Machines Corporation and +Copyright (C) 2003-2010, International Business Machines +Copyright (C) 2003-2011, International Business Machines Corporation and * +Copyright (C) 2003-2011, International Business Machines Corporation and * +Copyright (C) 2003-2014, International Business Machines Corporation and +Copyright (C) 2003-2014, International Business Machines Corporation and * +Copyright (C) 2003-2015, International Business Machines Corporation and +Copyright (C) 2003-2016, Google, International Business Machines Corporation +Copyright (C) 2003-2016, International Business Machines Corporation and +Copyright (C) 2003-2016, International Business Machines Corporation and * +Copyright (C) 2003-2016, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2004-2006, International Business Machines Corporation and * +Copyright (C) 2004-2009, International Business Machines Corporation and * +Copyright (C) 2004-2009, International Business Machines Corporation and * +Copyright (C) 2004-2010, International Business Machines +Copyright (C) 2004-2010, International Business Machines Corporation and * +Copyright (C) 2004-2014, International Business Machines Corporation and +Copyright (C) 2004-2015, International Business Machines +Copyright (C) 2004-2016, Google Inc, International Business Machines +Copyright (C) 2004-2016, International Business Machines Corporation and +Copyright (C) 2005 - 2012, International Business Machines Corporation and * +Copyright (C) 2004-2016, International Business Machines Corporation and * +Copyright (C) 2005 - 2014, International Business Machines Corporation and * +Copyright (C) 2005-2006, International Business Machines +Copyright (C) 2005-2010, International Business Machines +Copyright (C) 2005-2011, International Business Machines Corporation and * +Copyright (C) 2005-2012, International Business Machines Corporation and * +Copyright (C) 2005-2012, International Business Machines Corporation and * +Copyright (C) 2005-2013, International Business Machines Corporation and * +Copyright (C) 2005-2015, International Business Machines Corporation and +Copyright (C) 2005-2016 International Business Machines Corporation and +copyright=" Copyright (c) IBM Corporation 1996, 2000. All rights reserved. "; +Copyright IBM Corporation, 1997, 2000, 2005, 2007. All Rights Reserved. */ +Copyright IBM Corporation, 1996-2016. All Rights Reserved. */ +Copyright (c) 2013-2014, International Business Machines +Copyright (c) 2007-2015 International Business Machines Corporation and * +Copyright (c) 2007-2009 International Business Machines Corporation and * +Copyright (c) 2004-2016, International Business Machines +Copyright (c) 2004-2014, International Business Machines +Copyright (c) 2004-2015, International Business Machines +Copyright (C) 2005-2016, International Business Machines Corporation and +Copyright (C) 2005-2016, International Business Machines Corporation and * +Copyright (C) 2006-2009, Google, International Business Machines Corporation * +Copyright (C) 2006-2015, International Business Machines Corporation and +Copyright (C) 2006-2016, Google, International Business Machines Corporation +Copyright (C) 2007, International Business Machines Corporation and * +Copyright (C) 2007-2008, International Business Machines Corporation and * +Copyright (C) 2007-2009, International Business Machines Corporation and * +Copyright (C) 2007-2010, International Business Machines Corporation and * +Copyright (C) 2007-2010, International Business Machines Corporation and * +Copyright (C) 2007-2011, International Business Machines Corporation and * +Copyright (C) 2007-2011, International Business Machines Corporation and others. +Copyright (C) 2007-2012, International Business Machines Corporation and * +Copyright (C) 2007-2014, International Business Machines Corporation and * +Copyright (C) 2007-2013, International Business Machines Corporation and * +Copyright (C) 2007-2014, International Business Machines Corporation and * +Copyright (C) 2007-2015, Google Inc, International Business Machines Corporation +Copyright (C) 2007-2015, International Business Machines Corporation and +Copyright (C) 2007-2016, International Business Machines +Copyright (C) 2007-2015, International Business Machines Corporation and * +Copyright (c) 2004-2010, International Business Machines +Copyright (c) 2004-2013, International Business Machines +Copyright (c) 2003-2016, International Business Machines +Copyright (c) 2003-2016 International Business Machines +Copyright (c) 2003-2011, International Business Machines +Copyright (c) 2003-2010, International Business Machines +Copyright (c) 2002-2016, International Business Machines +Copyright (c) 2002-2015, International Business Machines +Copyright (c) 2002-2014, International Business Machines Corporation +Copyright (C) 2007-2016, International Business Machines Corporation and +Copyright (C) 2007-2016, International Business Machines Corporation and * +Copyright (C) 2008-2009, Google, International Business Machines +Copyright (C) 2008-2014, Google, International Business Machines +Copyright (C) 2008-2009, International Business Machines +Copyright (C) 2008-2015, Google, International Business Machines Corporation and +Copyright (C) 2008-2014, International Business Machines Corporation and * +Copyright (C) 2008-2015, International Business Machines Corporation and +Copyright (C) 2008-2016 International Business Machines Corporation +Copyright (C) 2008-2016, Google Inc, International Business Machines Corporation +Copyright (C) 2008-2016, International Business Machines +Copyright (C) 2008-2016, International Business Machines Corporation and +Copyright (C) 2009 , Yahoo! Inc. * +Copyright (C) 2009, Google, International Business Machines Corporation and * +Copyright (C) 2009, International Business Machines Corporation and * +Copyright (C) 2009, International Business Machines Corporation and * +Copyright (C) 2009,2016 International Business Machines Corporation and +Copyright (C) 2009-2010, Google, International Business Machines Corporation * +Copyright (C) 2009-2010, International Business Machines Corporation and * +Copyright (C) 2009-2011, Google, International Business Machines Corporation +Copyright (C) 2009-2011, International Business Machines Corporation and * +Copyright (C) 2009-2012, International Business Machines Corporation and * +Copyright (C) 2009-2013, International Business Machines Corporation and * +Copyright (C) 2009-2014, International Business Machines +Copyright (C) 2009-2014, International Business Machines Corporation and +Copyright (C) 2009-2014, International Business Machines Corporation and * +Copyright (C) 2009-2015, Google, International Business Machines Corporation +Copyright (C) 2009-2015, International Business Machines Corporation and +Copyright (C) 2009-2015, International Business Machines +Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation +Copyright (C) 2009-2015, International Business Machines Corporation and * +Copyright (C) 2009-2016, International Business Machines +Copyright (C) 2009-2016, International Business Machines Corporation and +Copyright (C) 2009-2016, International Business Machines Corporation and * +Copyright (C) 2009-2016, International Business Machines Corporation, +Copyright (C) 2010, International Business Machines +Copyright (C) 2010, International Business Machines Corporation and * +Copyright (C) 2010-2011, Google, International Business Machines * +Copyright (C) 2010-2014, Google, International Business Machines Corporation * +Copyright (C) 2010-2013, International Business Machines Corporation and * +Copyright (C) 2010-2014, International Business Machines +Copyright (C) 2010-2015, International Business Machines +Copyright (C) 2010-2016, Google, Inc.; International Business Machines * +Copyright (C) 2010-2016, International Business Machines +Copyright (C) 2011, International Business Machines +Copyright (C) 2011, International Business Machines Corporation and * +Copyright (C) 2011-2014, International Business Machines +Copyright (C) 2011-2016, International Business Machines Corporation and +Copyright (C) 2011-2016, International Business Machines Corporation +Copyright (C) 2011-2016, International Business Machines Corporation and * +Copyright (C) 2012-2014, International Business Machines Corporation and * +Copyright (C) 2012-2014, International Business Machines +Copyright (C) 2012, International Business Machines Corporation and * +Copyright (C) 2012-2015, International Business Machines +Copyright (C) 2012-2015, International Business Machines Corporation and * +Copyright (C) 2012-2016, Google, International Business Machines Corporation and +Copyright (C) 2012-2016, International Business Machines Corporation and +Copyright (C) 2012-2016, International Business Machines +Copyright (C) 2012-2016, International Business Machines Corporation and * +Copyright (C) 2013, Google Inc, International Business Machines Corporation and * +Copyright (C) 2013-2014, International Business Machines +Copyright (C) 2013, International Business Machines Corporation and * +Copyright (C) 2013-2014, International Business Machines Corporation and * +Copyright (C) 2013-2015, International Business Machines +Copyright (C) 2013-2016, International Business Machines Corporation and +Copyright (C) 2013-2015, International Business Machines Corporation and +Copyright (C) 2014, International Business Machines Corporation and +Copyright (C) 2014-2015, International Business Machines Corporation and +Copyright (C) 2014, International Business Machines Corporation and * +Copyright (C) 2014-2016, International Business Machines Corporation and +Copyright (C) 2015-2016, International Business Machines Corporation and +Copyright (C) 2015, International Business Machines Corporation and +Copyright (C) 2016, International Business Machines Corporation and +Copyright (C) 2016, International Business Machines Corporation and * +Copyright (c) 2001-2016, International Business Machines +Copyright (c) 2001-2011, International Business Machines +Copyright (c) 2001-2016, International Business Machines Corporation and +Copyright (c) 2002-2007, International Business Machines Corporation +Copyright (c) 2002, International Business Machines Corporation +Copyright (c) 2002-2010, International Business Machines +Copyright (c) 2002-2010, International Business Machines Corporation +Copyright (c) 2002-2011, International Business Machines Corporation +Copyright (c) 2002-2014, Google, International Business Machines diff --git a/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/custom-license b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/custom-license new file mode 100644 index 000000000000..6b1d0bfabc3c --- /dev/null +++ b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/custom-license @@ -0,0 +1 @@ +LICENSE diff --git a/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/files-keep b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/files-keep new file mode 100644 index 000000000000..6b1d0bfabc3c --- /dev/null +++ b/tools/legal-review/Table/com.ibm.icu.icu4j-67.1/files-keep @@ -0,0 +1 @@ +LICENSE diff --git a/tools/legal-review/Table/report-state b/tools/legal-review/Table/report-state index 43a9a3c242a4..78446ef96d09 100644 --- a/tools/legal-review/Table/report-state +++ b/tools/legal-review/Table/report-state @@ -1,3 +1,3 @@ -B39E86F4C9F95AE99476021C49ED7121CA82A4F734D1D1E493F2A00773D753C2 -4CDF1F77633EEBEC5D67B384CCABAF0FC5EBF4BD843FBE86923D72D4EAABB603 +BCA8C55EBEE17E0D943D118A0C87C12B595FA9CB051D8E9B681CE25E881371B4 +C2918747D187B8302001AFACBFFD37DB381F8B007E54459A02A2A10FE0EF95B5 0