From 453540c74f3265bd7f6365e4d5db387bcd9d5e8c Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Thu, 9 Sep 2021 14:28:51 -0700 Subject: [PATCH 1/3] Remove lower precision time/date types. --- binary/expression.proto | 9 ++-- binary/type.proto | 31 +++----------- site/docs/types/compound_logical_types.md | 6 +-- site/docs/types/physical_types.md | 52 +++++++++++------------ site/docs/types/simple_logical_types.md | 8 ++-- 5 files changed, 38 insertions(+), 68 deletions(-) diff --git a/binary/expression.proto b/binary/expression.proto index e58958095..49cb55136 100644 --- a/binary/expression.proto +++ b/binary/expression.proto @@ -29,11 +29,9 @@ message Expression { double fp64 = 11; string string = 12; bytes binary = 13; - fixed64 timestamp_micro = 14; - fixed64 timestamp_milli = 15; + fixed64 timestamp = 14; fixed32 date = 16; - fixed32 time_micro = 17; - uint32 time_milli = 18; + uint64 time = 17; IntervalYearToMonth interval_year_to_month = 19; IntervalDayToSecond interval_day_to_second = 20; string fixed_char = 21; @@ -42,8 +40,7 @@ message Expression { bytes decimal = 24; Struct struct = 25; Map map = 26; - fixed64 timestamp_micro_tz = 27; - fixed64 timestamp_milli_tz = 28; + fixed64 timestamp_tz = 27; } message Map { diff --git a/binary/type.proto b/binary/type.proto index b7ca0d9dd..6bdde11ac 100644 --- a/binary/type.proto +++ b/binary/type.proto @@ -12,11 +12,9 @@ message Type { FP64 fp64 = 11; String string = 12; Binary binary = 13; - TimestampMicro timestamp_micro = 14; - TimestampMilli timestamp_milli = 15; + Timestamp timestamp = 14; Date time = 16; - TimeMicro time_micro = 17; - TimeMilli time_milli = 18; + Time time = 17; IntervalYear interval_year = 19; IntervalDay interval_day = 20; FixedChar fixed_char = 21; @@ -27,8 +25,7 @@ message Type { NamedStruct named_struct = 26; List list = 27; Map map = 28; - TimestampMicroTZ timestamp_micro_tz = 29; - TimestampMilliTZ timestamp_milli_tz = 30; + TimestampTZ timestamp_tz = 29; UserDefined user_defined = 31; } @@ -140,15 +137,7 @@ message Type { } } - message TimestampMicro { - PhysicalType physical_type = 1; - - enum PhysicalType { - SYSTEM_DEFAULT = 0; - } - } - - message TimestampMilli { + message Timestamp { PhysicalType physical_type = 1; enum PhysicalType { @@ -164,15 +153,7 @@ message Type { } } - message TimeMicro { - PhysicalType physical_type = 1; - - enum PhysicalType { - SYSTEM_DEFAULT = 0; - } - } - - message TimeMilli { + message Time { PhysicalType physical_type = 1; enum PhysicalType { @@ -302,7 +283,7 @@ message Type { } } - message TimestampMilliTZ { + message TimestampTZ { string timezone = 1; PhysicalType physical_type = 2; diff --git a/site/docs/types/compound_logical_types.md b/site/docs/types/compound_logical_types.md index d1f86388d..1d0f61f64 100644 --- a/site/docs/types/compound_logical_types.md +++ b/site/docs/types/compound_logical_types.md @@ -7,13 +7,11 @@ Compound types include any type that is configurable including complex types as | FIXEDCHAR(L) | A fixed length field of length L. L can be between [1..2,147,483,647]. Values less that are less in length than the length of the field are padded with spaces. | None | None | CharType(L) | CHAR(L) | | VARCHAR(L) | A field that can holds UTF8 encoded strings between 0 and L length. The length of each value can be between [0..2,147,483,647]. The value of L can be between [1..2,147,483,647]. Values shorter than L are not padded. | None | None | VarcharType(L) | VARCHAR(L) | | FIXEDBINARY(L) | A binary field that is fixed in width to L. Values that are shorter than L are 0-byte padded. | FixedSizeBinary<L> | FIXED<L> | - | - | -| DECIMAL(P,S) | Fixed point decimal with precision P and scale S. Precision must be 38 or less. | DECIMAL(P,S) | DECIMAL(P,S) | DECIMAL(P,S) | DECIMAL(P,S) | +| DECIMAL(P,S) | A fixed precision decimal value having precision (P, number of digits) <= 38 and Scale (S, number of fractional digist) 0 <= S <= P) | Decimal | DECIMAL(P,S) | DECIMAL(P,S) | DECIMAL(P,S) | | STRUCT<N:T1,...,N:T2> | A struct that maps unique names to value types. Each name is a UTF8 string. Each value can have a distinct type. | struct_<*> | struct<*> | struct<*> | row<*> | | LIST<T> | A list of values of type T. The list can be between [0..2,147,483,647] values in length. Maps to the | list | list | list | array | | MAP<K, V> | An unordered list of type K keys with type V values. | map<k,v> | map<k,v> | - | map<k,v> | -| TIMESTAMP_MICRO_TZ(TZ) | A timestamp in microseconds with a timezone TZ. | timestamp<micro;tz> | timestamptz | - | timestamp(6) with time zone | -| TIMESTAMP_MILLI_TZ(TZ) | A timestamp in microseconds with a timezone TZ. | timestamp<milli;tz> | - | - | timestamp(3) with time zone | -| Decimal(P, S) | A fixed precision decimal value having precision (P, number of digits) <= 38 and Scale (S, number of fractional digist) 0 <= S <= P) | Decimal | decimal(P,S) | decimal(P, S) | DECIMAL(P, S) +| TIMESTAMP_TZ(TZ) | A timestamp with microseconds precision and a type declared timezone TZ. | timestamp<micro;tz> | timestamptz | - | timestamp(6) with time zone | #### Discussion Points diff --git a/site/docs/types/physical_types.md b/site/docs/types/physical_types.md index 1cae933ca..aad6339b3 100644 --- a/site/docs/types/physical_types.md +++ b/site/docs/types/physical_types.md @@ -6,34 +6,30 @@ For each logical type, we declare one or more physical representations of that l In many cases, a system will only have a single physical representation of each type. In those cases, it is expected that the binding of an operation is associated with the system default representation of the data. While a physical types are defined as discrete from logical types within the specification, the serialization formats will typically collapse these into a singular concept. -| Logical Type | Physical Representations | Support Dictionary Encoding | -| ------------------ | ----------------------------------------------------------- | --------------------------- | -| boolean | 0=System default | no | -| i8 | 0=System default | no | -| i16 | 0=System default | no | -| i32 | 0=System default | no | -| i64 | 0=System default | no | -| fp32 | 0=System default | no | -| fp64 | 0=System default | no | -| string | 0=System default, 1=Arrow Large String | yes | -| binary | 0=System default, Arrow Large Binary | yes | -| timestamp_micro | 0=System default | no | -| timestamp_milli | 0=System default | no | -| date | 0=System default | no | -| date_micro | 0=System default | no | -| time_micro | 0=System default | no | -| time_milli | 0=System default | no | -| interval_year | 0=System default | no | -| interval_day | 0=System default, 1=Arrow MONTH_DAY_NANO | no | -| fixedchar | 0=System default | yes | -| varchar | 0=System default | yes | -| fixedbinary | 0=System default | yes | -| decimal | 0=System default, 1=Arrow 128 Bit Width | no | -| struct | 0=System default | yes | -| list | 0=System default, 1=Arrow Large List | yes | -| map | 0=System default, 1=Map where keys are utf8 ordered strings | yes | -| timestamp_micro_tz | 0=System default | no | -| timestamp_milli_tz | 0=System default | no | +| Logical Type | Physical Representations | Support Dictionary Encoding | +| ------------- | ----------------------------------------------------------- | --------------------------- | +| boolean | 0=System default | no | +| i8 | 0=System default | no | +| i16 | 0=System default | no | +| i32 | 0=System default | no | +| i64 | 0=System default | no | +| fp32 | 0=System default | no | +| fp64 | 0=System default | no | +| string | 0=System default, 1=Arrow Large String | yes | +| binary | 0=System default, Arrow Large Binary | yes | +| timestamp | 0=System default | no | +| date | 0=System default | no | +| time | 0=System default | no | +| interval_year | 0=System default | no | +| interval_day | 0=System default, 1=Arrow MONTH_DAY_NANO | no | +| fixedchar | 0=System default | yes | +| varchar | 0=System default | yes | +| fixedbinary | 0=System default | yes | +| decimal | 0=System default, 1=Arrow 128 Bit Width | no | +| struct | 0=System default | yes | +| list | 0=System default, 1=Arrow Large List | yes | +| map | 0=System default, 1=Map where keys are utf8 ordered strings | yes | +| timestamp_tz | 0=System default | no | diff --git a/site/docs/types/simple_logical_types.md b/site/docs/types/simple_logical_types.md index c61d25ee5..e3bfa4dce 100644 --- a/site/docs/types/simple_logical_types.md +++ b/site/docs/types/simple_logical_types.md @@ -15,11 +15,9 @@ To minimize type explosion, the project currently follows the guideline that a l | fp64 | An 8 byte double precision floating point number with range as defined [here](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). | Float<DOUBLE> | double | DecimalType | double | | string | A string of text that can be up to 2,147,483,647 bytes in length. String is encoded in UTF8 | Utf8 | string | StringType | varchar (no len) | | binary | A binary value that can be up to 2,147,483,647 bytes in length. | Binary | binary | BinaryType | Varbinary | -| timestamp_micro | A timestamp with microsecond precision | Timestamp<MICROSECOND> | timestamp | TimestampType | timestamp(6) | -| timestamp_milli | A timestamp with millisecond precision | Timestamp<MILLISECOND> | - | - | timestamp(3) | -| date | Date, expressed as number of seconds since epoch | Date<MILLISECOND> | date | DateType | Date | -| time_micro | A time expressed in microseconds since start of day | Time<MICROSECOND;64> | time | time(6) | time(6) | -| time_milli | A time expressed in milliseconds since start of day | Time<MILLISECOND;32> | - | time(3) | time(3) | +| timestamp | A timestamp with microsecond precision | Timestamp<MICROSECOND> | timestamp | TimestampType | timestamp(6) | +| date | A date | Date<MILLISECOND> | date | DateType | Date | +| time | A time with microsecond precision since the beginning of any day. Maximum value is one day. | Time<MICROSECOND;64> | time | time(6) | time(6) | | interval_year | Interval day to month | INTERVAL<YEAR_MONTH> | - | - | Interval year to month | | interval_day | Interval day to second | INTERVAL<DAY_TIME> | - | - | Interval day to second | From 0a20684bbdde13d24e7fe787186e02c4ca5d5e22 Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Thu, 9 Sep 2021 14:56:11 -0700 Subject: [PATCH 2/3] Fix duplicate name. --- binary/type.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/binary/type.proto b/binary/type.proto index 6bdde11ac..a8052fe29 100644 --- a/binary/type.proto +++ b/binary/type.proto @@ -13,7 +13,7 @@ message Type { String string = 12; Binary binary = 13; Timestamp timestamp = 14; - Date time = 16; + Date date = 16; Time time = 17; IntervalYear interval_year = 19; IntervalDay interval_day = 20; From 3dd9fb8f55ebee243f2c171fb8abc7be191bb6bc Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Thu, 9 Sep 2021 15:49:23 -0700 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Weston Pace --- site/docs/types/compound_logical_types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/docs/types/compound_logical_types.md b/site/docs/types/compound_logical_types.md index 1d0f61f64..31747dda2 100644 --- a/site/docs/types/compound_logical_types.md +++ b/site/docs/types/compound_logical_types.md @@ -7,7 +7,7 @@ Compound types include any type that is configurable including complex types as | FIXEDCHAR(L) | A fixed length field of length L. L can be between [1..2,147,483,647]. Values less that are less in length than the length of the field are padded with spaces. | None | None | CharType(L) | CHAR(L) | | VARCHAR(L) | A field that can holds UTF8 encoded strings between 0 and L length. The length of each value can be between [0..2,147,483,647]. The value of L can be between [1..2,147,483,647]. Values shorter than L are not padded. | None | None | VarcharType(L) | VARCHAR(L) | | FIXEDBINARY(L) | A binary field that is fixed in width to L. Values that are shorter than L are 0-byte padded. | FixedSizeBinary<L> | FIXED<L> | - | - | -| DECIMAL(P,S) | A fixed precision decimal value having precision (P, number of digits) <= 38 and Scale (S, number of fractional digist) 0 <= S <= P) | Decimal | DECIMAL(P,S) | DECIMAL(P,S) | DECIMAL(P,S) | +| DECIMAL(P,S) | A fixed precision decimal value having precision (P, number of digits) <= 38 and Scale (S, number of fractional digits) 0 <= S <= P) | Decimal<P, S, bitwidth=128> | DECIMAL(P,S) | DECIMAL(P,S) | DECIMAL(P,S) | | STRUCT<N:T1,...,N:T2> | A struct that maps unique names to value types. Each name is a UTF8 string. Each value can have a distinct type. | struct_<*> | struct<*> | struct<*> | row<*> | | LIST<T> | A list of values of type T. The list can be between [0..2,147,483,647] values in length. Maps to the | list | list | list | array | | MAP<K, V> | An unordered list of type K keys with type V values. | map<k,v> | map<k,v> | - | map<k,v> |