Entry point for writing tables (#1946)

enso-org · Aug 12, 2021 · ad0b677 · ad0b677
1 parent 7c45b92
commit ad0b677
Show file tree

Hide file tree

Showing 7 changed files with 174 additions and 0 deletions.
diff --git a/RELEASES.md b/RELEASES.md
@@ -23,6 +23,8 @@
   ([#1937](https://github.com/enso-org/enso/pull/1937)).
 - Added support for parsing CSV files with too many headers declared
   ([#1942](https://github.com/enso-org/enso/pull/1942)).
+- Added a single entry point method for all table serialization modes
+  ([#1946](https://github.com/enso-org/enso/pull/1946)).
 
 # Enso 0.2.23 (2021-08-09)
 

diff --git a/distribution/lib/Standard/Table/0.1.0/src/Data/Table.enso b/distribution/lib/Standard/Table/0.1.0/src/Data/Table.enso
@@ -6,6 +6,7 @@ import Standard.Table.Io.Csv
 import Standard.Visualization
 import Standard.Base.Data.Time.Date
 import Standard.Table.Io.Spreadsheet_Write_Mode
+import Standard.Table.Io.Format
 
 from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
 
@@ -861,6 +862,46 @@ type Table
     write_json : File.File -> Nothing
     write_json file = file.write this.to_json.to_text
 
+    ## UNSTABLE
+
+       Writes the table to a specified file with the given serialization
+       settings.
+
+       Arguments:
+       - file: the file to write to.
+       - format: the format settings to use.
+
+
+       > Example
+         Write a table to a CSV file, without writing the header.
+
+             import Standard.Examples
+             import Table
+
+             example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
+
+       > Example
+         Write a table to an XLSX file, without writing the header.
+
+             import Standard.Examples
+             import Table
+
+             example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
+
+       > Example
+         Write a table to a JSON file.
+
+             import Standard.Examples
+             import Table
+
+             example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
+    write : File.File -> Format.Format -> Nothing
+    write file format = case format of
+        Format.Csv header quote sep line max -> this.write_csv file header quote sep line max
+        Format.Xlsx sheet mode header max -> this.write_xlsx file sheet mode header max
+        Format.Json -> this.write_json file
+
+
 ## UNSTABLE
 
    Used for converting arbitrary values into fields in CSV files.

diff --git a/distribution/lib/Standard/Table/0.1.0/src/Io/Format.enso b/distribution/lib/Standard/Table/0.1.0/src/Io/Format.enso
@@ -0,0 +1,86 @@
+from Standard.Base import all
+import Standard.Table.Io.Spreadsheet_Write_Mode
+
+## Specifies the different output formats for serializing tables.
+type Format
+
+    ## UNSTABLE
+
+       Specifies the CSV output format settings.
+
+       Arguments:
+       - include_header: Specifies whether the first line of generated CSV
+         should contain the column names.
+       - always_quote: Specifies whether all fields in the resulting CSV should
+         be quoted. When this is set to `False`, only the fields containing the
+         `separator` in their contents will be quoted.
+       - separator: a sequence used to separate fields within a single row.
+       - line_ending: the style of line-endings to use in the generated CSV.
+       - max_rows_per_file: specifies the maximum number of rows that can be
+         written to a single file. If this option is set, instead of writing the
+         contents directly to a file, its name is parsed and a numbered series
+         of files with names based on it is written to instead. For example,
+         if file is `~/my_data/output.csv`, the table contains 250 rows, and
+         `max_rows_per_file` is set to `100`, 3 different files will be written:
+         - `~/my_data/output_1.csv`, containing rows 0 through 99;
+         - `~/my_data/output_2.csv`, containing rows 100 through 199;
+         - `~/my_data/output_3.csv`, containing rows 200 through 249.
+
+       > Example
+         Write a table to a CSV file, without writing the header.
+
+             import Standard.Examples
+             import Table
+
+             example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
+    type Csv include_header=True always_quote=False separator=',' line_ending=Line_Ending_Style.Unix max_rows_per_file=Nothing
+
+    ## UNSTABLE
+
+       Specifies XLSX format settings.
+
+       Arguments:
+       - sheet: the name of the sheet to use for writing the data.
+       - write_mode: specifies this method's behavior if the specified file and
+         sheet already exist. Can be one of:
+         - Spreadsheet_Write_Mode.Create: this is the default value. This
+           setting will create a new sheet in the file, with a name chosen such
+           that the clash is avoided.
+         - Spreadsheet_Write_Mode.Overwrite: will result in removing all
+           contents of the existing sheet and replacing it with the new data.
+         - Spreadsheet_Write_Mode.Append: will append this data to the existing
+           sheet, such that the new data starts after the last row containing
+           any data.
+       - include_header: Specifies whether the first line of generated CSV
+         should contain the column names.
+       - max_rows_per_file: specifies the maximum number of rows that can be
+         written to a single file. If this option is set, instead of writing the
+         contents directly to the file, its name is parsed and a numbered series
+         of files with names based on it is written to instead. For example, if
+         the file is `~/my_data/output.xlsx`, the table contains 250 rows, and
+         `max_rows_per_file` is set to `100`, 3 different files will be written:
+         - `~/my_data/output_1.xlsx`, containing rows 0 through 99;
+         - `~/my_data/output_2.xlsx`, containing rows 100 through 199;
+         - `~/my_data/output_3.xlsx`, containing rows 200 through 249.
+
+       > Example
+         Write a table to an XLSX file, without writing the header.
+
+             import Standard.Examples
+             import Table
+
+             example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
+    type Xlsx sheet='Data' write_mode=Spreadsheet_Write_Mode.Create include_header=True max_rows_per_file=Nothing
+
+    ## UNSTABLE
+
+       Specifies that the table should be written to a JSON file.
+
+       > Example
+         Write a table to a JSON file.
+
+             import Standard.Examples
+             import Table
+
+             example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
+    type Json
diff --git a/distribution/lib/Standard/Table/0.1.0/src/Main.enso b/distribution/lib/Standard/Table/0.1.0/src/Main.enso
@@ -2,6 +2,7 @@ from Standard.Base import all
 
 import Standard.Geo.Geo_Json
 import Standard.Table.Io.Csv
+import Standard.Table.Io.Format
 import Standard.Table.Io.Spreadsheet
 import Standard.Table.Io.Spreadsheet_Write_Mode
 import Standard.Table.Data.Table
@@ -11,6 +12,7 @@ import Standard.Table.Data.Order_Rule
 from Standard.Table.Io.Csv export all hiding Parser
 from Standard.Table.Io.Spreadsheet export all hiding Reader
 
+export Standard.Table.Io.Format
 export Standard.Table.Io.Spreadsheet_Write_Mode
 export Standard.Table.Data.Column
 

diff --git a/test/Table_Tests/src/Csv_Spec.enso b/test/Table_Tests/src/Csv_Spec.enso
@@ -144,3 +144,32 @@ spec =
             out_1.delete_if_exists
             out_2.delete_if_exists
             out_3.delete_if_exists
+
+        Test.specify 'should be possible through the write method' <|
+            varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
+            out = Enso_Project.data / 'out.csv'
+            out_1 = Enso_Project.data / 'out_1.csv'
+            out_2 = Enso_Project.data / 'out_2.csv'
+            out_3 = Enso_Project.data / 'out_3.csv'
+            out_1.delete_if_exists
+            out_2.delete_if_exists
+            out_3.delete_if_exists
+            varied_column.write out (Table.Format.Csv include_header=False separator=';' max_rows_per_file=3)
+            exp_1 = '''
+                2005-02-25;2005-02-25;1;1;1.0;1
+                2005-02-28;2005-02-28;2;2;2.0;2
+                4;2005-03-01;3;3;3.0;3\n
+            exp_2 = '''
+                2005-03-02;;4;4;4.0;4
+                ;2005-03-03;5;5;5.0;5
+                2005-03-04;2005-03-04;;6;6.25;6.25\n
+            exp_3 = '''
+                2005-03-07;2005-03-07;7;7;7.0;7
+                2005-03-08;2005-03-08;8;8;8.0;osiem\n
+            out_1.read.should_equal exp_1
+            out_2.read.should_equal exp_2
+            out_3.read.should_equal exp_3
+            out_1.delete_if_exists
+            out_2.delete_if_exists
+            out_3.delete_if_exists
+
diff --git a/test/Table_Tests/src/Json_Spec.enso b/test/Table_Tests/src/Json_Spec.enso
@@ -19,3 +19,9 @@ spec = Test.group 'JSON conversion' <|
         (Json.parse out.read).to_table ['a', 'b', 'c'] . should_equal simple_empty
         out.delete_if_exists
 
+    Test.specify 'Should write JSON tables to disk using the write method' <|
+        out = Enso_Project.data / 'out.json'
+        out.delete_if_exists
+        simple_empty.write out Table.Format.Json
+        (Json.parse out.read).to_table ['a', 'b', 'c'] . should_equal simple_empty
+        out.delete_if_exists
diff --git a/test/Table_Tests/src/Spreadsheet_Spec.enso b/test/Table_Tests/src/Spreadsheet_Spec.enso
@@ -78,6 +78,14 @@ spec =
             read . should_equal (clothes.concat clothes)
             out.delete_if_exists
 
+        Test.specify 'should allow writing using the generic write method' <|
+            out.delete_if_exists
+            clothes.write out (Table.Format.Xlsx sheet='Foo')
+            clothes.write out (Table.Format.Xlsx sheet='Foo' write_mode=Table.Spreadsheet_Write_Mode.Append include_header=False)
+            read = out.read_xlsx sheet='Foo'
+            read . should_equal (clothes.concat clothes)
+            out.delete_if_exists
+
         Test.specify 'should write multiple files if row limit is specified' <|
             out_1 = Enso_Project.data / 'out_1.xlsx'
             out_2 = Enso_Project.data / 'out_2.xlsx'