From 333a591b4c1ad1b30fc404e622c8b08968a211db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 11:19:36 +0200 Subject: [PATCH 01/14] missing CR from previous PR --- distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index ad9fcbeea235..4b647638eab7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -12,6 +12,13 @@ polyglot java import org.enso.table.error.EmptySheetException type Missing_Input_Columns ## PRIVATE One or more columns not found in the input table. + + Arguments: + - criteria: the names of the columns or regular expressions that did not + have any matches. + - where: an optional text describing to which object this error is + related to (for example in join, whether the reported error is for the + left or right table). Error (criteria : [Text]) (where:Text|Nothing = Nothing) ## PRIVATE From c2673347e59d3f2c64b8a812852101be0297539d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 16:09:50 +0200 Subject: [PATCH 02/14] better handling for primary key error --- .../Database/0.0.0-dev/src/Errors.enso | 8 ++++-- .../src/Extensions/Upload_Table.enso | 28 ++++++++++++++++--- .../Table/0.0.0-dev/src/Data/Table.enso | 15 ++++++++++ .../Table_Tests/src/Database/Upload_Spec.enso | 8 ++++++ 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso index e39dc95c35fb..e482a6833229 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso @@ -155,10 +155,14 @@ type Non_Unique_Primary_Key Arguments: - primary_key: The primary key that is not unique. - Error (primary_key : Vector Text) + - clashing_primary_key: The values of an example key that corresponds to + more than one row. + - clashing_example_row_count: The number of rows that correspond to the + example key. + Error (primary_key : Vector Text) (clashing_primary_key : Vector Any) (clashing_example_row_count : Integer) ## PRIVATE Pretty print the non-unique primary key error. to_display_text : Text to_display_text self = - "The primary key " + self.primary_key.to_display_text + " is not unique." + "The primary key " + self.primary_key.to_display_text + " is not unique. The key "+clashing_primary_key.to_display_text+" corresponds to "+clashing_example_row_count.to_text+" rows." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso index dedd832ffd53..93fa8bdabb85 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso @@ -62,7 +62,7 @@ In_Memory_Table.create_database_table self connection table_name=Nothing primary continue. Otherwise, they could 'leak' to `Panic.rethrow` and be wrongly raised as panics. upload_status = create_table_statement.if_not_error <| - translate_known_upload_errors connection resolved_primary_key <| + translate_known_upload_errors self connection resolved_primary_key <| connection.jdbc_connection.run_within_transaction <| Panic.rethrow <| connection.execute_update create_table_statement if structure_only.not then @@ -119,7 +119,7 @@ Database_Table.create_database_table self connection table_name=Nothing primary_ Error.throw (Unsupported_Database_Operation.Error "The Database table to be uploaded must be coming from the same connection as the connection on which the new table is being created. Cross-connection uploads are currently not supported. To work around this, you can first `.read` the table into memory and then upload it from memory to a different connection.") upload_status = connection_check.if_not_error <| create_table_statement.if_not_error <| - translate_known_upload_errors connection resolved_primary_key <| + translate_known_upload_errors self connection resolved_primary_key <| connection.jdbc_connection.run_within_transaction <| Panic.rethrow <| connection.execute_update create_table_statement if structure_only.not then @@ -144,15 +144,35 @@ resolve_primary_key table primary_key = case primary_key of ## PRIVATE Inspects any `SQL_Error` thrown and replaces it with a more precise error type when available. -translate_known_upload_errors connection primary_key ~action = +translate_known_upload_errors source_table connection primary_key ~action = handler caught_panic = error_mapper = connection.dialect.get_error_mapper sql_error = caught_panic.payload case error_mapper.is_primary_key_violation sql_error of - True -> Error.throw (Non_Unique_Primary_Key.Error primary_key) + True -> raise_duplicated_primary_key_error source_table primary_key False -> Panic.throw caught_panic Panic.catch SQL_Error action handler +## PRIVATE + Creates a `Non_Unique_Primary_Key` error containing information about an + example group violating the uniqueness constraint. +raise_duplicated_primary_key_error source_table primary_key original_panic = + agg = source_table.aggregate [Aggregate_Column.Count]+(primary_key.map Aggregate_Column.Group_By) + filtered = agg.filter column=0 (Filter_Condition.Greater than=1) + materialized = filtered.read max_rows=1 + case materialized.row_count == 0 of + ## If we couldn't find a duplicated key, we give up the translation and + rethrow the original panic containing the SQL error. This could + happen if the constraint violation is on some non-trivial key, like + case insensitive. + True -> Panic.throw original_panic + False -> + row = materialized.first_row + example_count = row.first + example_entry = row.drop 1 + Error.throw (Non_Unique_Primary_Key.Error primary_key example_entry example_count) + + ## PRIVATE Creates a statement that will create a table with structure determined by the provided columns. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 442100065529..5af03201ac47 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1660,6 +1660,21 @@ type Table row_count : Integer row_count self = self.java_table.rowCount + ## Returns a materialized dataframe containing rows of this table. + + In the in-memory backend, this returns the same table, truncated to + `max_rows`. This is only kept for API compatibility between database and + in-memory tables. The `read` operation can be used to ensure that the + table is now in-memory, regardless of its origin. + + Arguments: + - max_rows: specifies a maximum amount of rows to fetch; if not set, all + available rows are fetched. + read : (Integer | Nothing) -> Materialized_Table + read self max_rows=Nothing = case max_rows of + Nothing -> self + _ : Integer -> self.take (First max_rows) + ## Returns a Table describing this table's contents. The table lists all columns, counts of non-null items and value types of diff --git a/test/Table_Tests/src/Database/Upload_Spec.enso b/test/Table_Tests/src/Database/Upload_Spec.enso index 57ec8feb1073..69fabf3777c4 100644 --- a/test/Table_Tests/src/Database/Upload_Spec.enso +++ b/test/Table_Tests/src/Database/Upload_Spec.enso @@ -104,14 +104,22 @@ spec make_new_connection prefix persistent_connector=True = t1 = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]] r1 = t1.create_database_table connection (Name_Generator.random_name "primary-key-6") temporary=True primary_key=["X"] r1.should_fail_with Non_Unique_Primary_Key + e1 = r1.catch + e1.clashing_primary_key . should_equal [1] + e1.clashing_example_row_count . should_equal 2 + e1.to_display_text . should_equal "The primary key [X] is not unique. The key [1] corresponds to 2 rows." + r2 = t1.create_database_table connection (Name_Generator.random_name "primary-key-6") temporary=True primary_key=["Y"] r2.should_fail_with Non_Unique_Primary_Key + r2.catch . clashing_primary_key . should_equal ['b'] + r3 = t1.create_database_table connection (Name_Generator.random_name "primary-key-7") temporary=True primary_key=["X", "Y"] r3.at "X" . to_vector . should_equal [1, 2, 1] t2 = Table.new [["X", [1, 2, 1]], ["Y", ['a', 'b', 'a']]] r4 = t2.create_database_table connection (Name_Generator.random_name "primary-key-7") temporary=True primary_key=["X", "Y"] r4.should_fail_with Non_Unique_Primary_Key + r4.catch . clashing_primary_key . should_equal [1, 'a'] Test.group prefix+"Persisting a Database Table (query)" <| Test.specify "should be able to create a persistent copy of a DB table" <| From fe61a898fbad8fe53b303ab330b6e7f632b603b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 16:27:25 +0200 Subject: [PATCH 03/14] fixes --- distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso | 4 ++++ distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso | 2 +- .../Database/0.0.0-dev/src/Extensions/Upload_Table.enso | 4 ++-- distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso | 2 +- test/Tests/src/Data/Vector_Spec.enso | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso index 9fc87fa0c070..656270126a5f 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso @@ -618,6 +618,10 @@ type Vector a to_text : Text to_text self = self.map .to_text . join ", " "[" "]" + ## PRIVATE + to_display_text : Text + to_display_text self = self.short_display_text max_entries=40 + ## PRIVATE ADVANCED diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso index e482a6833229..a12bca5a15df 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Errors.enso @@ -165,4 +165,4 @@ type Non_Unique_Primary_Key Pretty print the non-unique primary key error. to_display_text : Text to_display_text self = - "The primary key " + self.primary_key.to_display_text + " is not unique. The key "+clashing_primary_key.to_display_text+" corresponds to "+clashing_example_row_count.to_text+" rows." + "The primary key " + self.primary_key.to_display_text + " is not unique. The key "+self.clashing_primary_key.to_display_text+" corresponds to "+self.clashing_example_row_count.to_text+" rows." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso index 93fa8bdabb85..7f2130e0f416 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Table.enso @@ -149,7 +149,7 @@ translate_known_upload_errors source_table connection primary_key ~action = error_mapper = connection.dialect.get_error_mapper sql_error = caught_panic.payload case error_mapper.is_primary_key_violation sql_error of - True -> raise_duplicated_primary_key_error source_table primary_key + True -> raise_duplicated_primary_key_error source_table primary_key caught_panic False -> Panic.throw caught_panic Panic.catch SQL_Error action handler @@ -167,7 +167,7 @@ raise_duplicated_primary_key_error source_table primary_key original_panic = case insensitive. True -> Panic.throw original_panic False -> - row = materialized.first_row + row = materialized.first_row.to_vector example_count = row.first example_entry = row.drop 1 Error.throw (Non_Unique_Primary_Key.Error primary_key example_entry example_count) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 5af03201ac47..936bee0f591c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1670,7 +1670,7 @@ type Table Arguments: - max_rows: specifies a maximum amount of rows to fetch; if not set, all available rows are fetched. - read : (Integer | Nothing) -> Materialized_Table + read : (Integer | Nothing) -> Table read self max_rows=Nothing = case max_rows of Nothing -> self _ : Integer -> self.take (First max_rows) diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso index 5d4a72905abe..8ba859377564 100644 --- a/test/Tests/src/Data/Vector_Spec.enso +++ b/test/Tests/src/Data/Vector_Spec.enso @@ -346,6 +346,8 @@ type_spec name alter = Test.group name <| alter [1, 2, 3, 4, 5, 6] . short_display_text max_entries=3 . should_equal "[1, 2, 3 and 3 more elements]" alter (0.up_to 100).to_vector . short_display_text max_entries=2 . should_equal "[0, 1 and 98 more elements]" + alter [1, 2] . to_display_text . should_equal "[1, 2]" + alter [] . short_display_text max_entries=0 . should_fail_with Illegal_Argument Test.specify "should define equality" <| From 6f58eae15433bbef11fb1a10eb3428112a4da7f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 16:31:26 +0200 Subject: [PATCH 04/14] better test --- test/Table_Tests/src/Database/Upload_Spec.enso | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/Table_Tests/src/Database/Upload_Spec.enso b/test/Table_Tests/src/Database/Upload_Spec.enso index 69fabf3777c4..e3985e1f2d54 100644 --- a/test/Table_Tests/src/Database/Upload_Spec.enso +++ b/test/Table_Tests/src/Database/Upload_Spec.enso @@ -201,6 +201,22 @@ spec make_new_connection prefix persistent_connector=True = r1 = db_table.create_database_table connection (Name_Generator.random_name "copied-table") temporary=True primary_key=["X"] r1.should_fail_with Non_Unique_Primary_Key + e1 = r1.catch + e1.clashing_primary_key . should_equal [1] + e1.clashing_example_row_count . should_equal 2 + + t2 = Table.new [["X", [1, 3, 1, 2, 3, 2, 2, 2, 0]], ["Y", ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']]] + db_table_2 = t2.create_database_table connection (Name_Generator.random_name "source-table-2") temporary=True primary_key=Nothing + Problems.assume_no_problems db_table_2 + + r2 = db_table_2.create_database_table connection (Name_Generator.random_name "copied-table-2") temporary=True primary_key=["X"] + r2.should_fail_with Non_Unique_Primary_Key + e2 = r2.catch + e2.clashing_primary_key.length . should_equal 1 + x = e2.clashing_primary_key.first + [1, 2, 3].should_contain x + counts = Map.from_vector [[1, 2], [2, 4], [3, 2]] + e2.clashing_example_row_count . should_equal (counts.at x) Test.specify "will not allow to upload tables across connections" <| t = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]] From 58a03faf4e7f22ad81b5e716b566a9a9d94f8a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 16:36:32 +0200 Subject: [PATCH 05/14] make sure the file exists before trying to read it --- distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index 47a26123ffba..cb0cd4df7f26 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -245,7 +245,8 @@ type File @format format_widget read : File_Format -> Problem_Behavior -> Any ! File_Error read self format=Auto_Detect (on_problems=Problem_Behavior.Report_Warning) = - format.read self on_problems + if self.exists.not then Error.throw (File_Error.Not_Found self) else + format.read self on_problems ## ALIAS Load Bytes, Open Bytes Reads all bytes in this file into a byte vector. From e81fc3c2eb614dff3432bdad45ac62a8234af48c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 16:39:38 +0200 Subject: [PATCH 06/14] test --- test/Tests/src/System/File_Read_Spec.enso | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/Tests/src/System/File_Read_Spec.enso b/test/Tests/src/System/File_Read_Spec.enso index 71b7ec1f5b62..51b144edd6fa 100644 --- a/test/Tests/src/System/File_Read_Spec.enso +++ b/test/Tests/src/System/File_Read_Spec.enso @@ -20,6 +20,11 @@ spec = content = sample_txt.read content.should_equal "Hello World!" + Test.specify "should raise an not-found error when reading a nonexistent file even of unknown format" <| + r1 = (File.new "nonexistent.file.of.weird-format").read + r1.should_fail_with File_Error + r1.catch.should_be_a File_Error.Not_Found + Test.group "Bytes" <| Test.specify "should be able to read a file as Bytes" <| bytes = sample_xxx.read Bytes From 2801f856b7b29cd4fc3e1f3fe71e4141055d3d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 17:02:57 +0200 Subject: [PATCH 07/14] update sqlite tests --- .../Table_Tests/src/Database/SQLite_Spec.enso | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index 9702096aa2ee..47b25dd43433 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -154,7 +154,7 @@ sqlite_spec connection prefix = spec = enso_project.data.create_directory - file = enso_project.data / "sqlite_test.db" + file = enso_project.data / "transient" / "sqlite_test.db" file.delete_if_exists in_file_prefix = "[SQLite File] " sqlite_spec (Database.connect (SQLite file)) in_file_prefix @@ -174,11 +174,24 @@ spec = connection.execute_update 'CREATE TABLE "Dummy" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)' connection.close - Test.specify "should recognise a db file" <| - Auto_Detect.get_format (enso_project.data / "data.db") . should_be_a SQLite_Format + Test.specify "should recognise a SQLite database file" <| + Auto_Detect.get_format file . should_be_a SQLite_Format - Test.specify "should recognise a sqlite file" <| - Auto_Detect.get_format (enso_project.data / "data.sqlite") . should_be_a SQLite_Format + Test.specify "should recognise a sqlite file by extension fro writing" <| + Auto_Detect.get_format (enso_project.data / "nonexistent-data.db") . should_be_a SQLite_Format + Auto_Detect.get_format (enso_project.data / "nonexistent-data.sqlite") . should_be_a SQLite_Format + + Test.specify "should not recognise nonexistent or empty files for reading" <| + r1 = Data.read (enso_project.data / "nonexistent-data.db") + r1.should_fail_with File_Error + r1.catch . should_be_a File_Error.Not_Found + + empty = enso_project.data / "transient" / "empty-data.db" + "".write empty on_existing_file=Existing_File_Behavior.Overwrite . should_succeed + r2 = Data.read empty + r2.should_fail_with File_Error + r2.catch . should_be_a File_Error.Unsupported_Type + empty.delete_if_exists Test.specify "should connect to a db file" <| connection = Data.read file From c65979a41af48904f99538f8a27968b8ec2b6c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 17:33:27 +0200 Subject: [PATCH 08/14] restructure file formats into read|write, allowing to use file contents in read mode --- .../Base/0.0.0-dev/src/System/File.enso | 8 ++++ .../0.0.0-dev/src/System/File_Format.enso | 47 +++++++++++++++---- .../src/Connection/SQLite_Format.enso | 15 +++++- .../0.0.0-dev/src/Image_File_Format.enso | 9 +++- .../Table/0.0.0-dev/src/Data/Table.enso | 2 +- .../src/Delimited/Delimited_Format.enso | 9 +++- .../0.0.0-dev/src/Excel/Excel_Format.enso | 9 +++- .../src/Image_Read_Write_Spec.enso | 6 +-- .../Table_Tests/src/Database/SQLite_Spec.enso | 6 +-- 9 files changed, 86 insertions(+), 25 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index cb0cd4df7f26..20e6bac5eb56 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -613,6 +613,14 @@ type File resource = Managed_Resource.register stream close_stream Input_Stream.Value self resource + ## PRIVATE + Reads first `n` bytes from the file (or less if the file is too small) + and returns a vector of bytes. + read_first_bytes : Integer -> Vector ! File_Error + read_first_bytes self n = + opts = [File_Access.Read] + self.with_input_stream opts (_.read_n_bytes n) + ## PRIVATE Reads last `n` bytes from the file (or less if the file is too small) and returns a vector of bytes. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso index a92f9aff7d2c..41a38ab188bd 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso @@ -58,14 +58,26 @@ type Auto_Detect Implements the `File.read` for this `File_Format` read : File -> Problem_Behavior -> Any ! File_Error read self file on_problems = - reader = Auto_Detect.get_format file + reader = Auto_Detect.get_reading_format file if reader == Nothing then Error.throw (File_Error.Unsupported_Type file) else reader.read file on_problems ## PRIVATE - get_format : File -> Any | Nothing - get_format file = - get_format f-> f.for_file file + Finds a matching format for reading the file. + + It assumes that `file` already exists. + get_reading_format : File -> Any | Nothing + get_reading_format file = + get_format f-> f.for_file_read file + + ## PRIVATE + Finds a matching format for reading the file. + + It may not assume that the `file` exists, so it must only rely on the + file path (extension in particular), but not the contents. + get_writing_format : File -> Any | Nothing + get_writing_format file = + get_format f-> f.for_file_write file ## PRIVATE get_web_parser : Text -> URI -> Any | Nothing @@ -91,13 +103,18 @@ type Plain_Text_Format ## PRIVATE If the File_Format supports reading from the file, return a configured instance. - for_file : File -> Plain_Text_Format | Nothing - for_file file = + for_file_read : File -> Plain_Text_Format | Nothing + for_file_read file = case file.extension of ".txt" -> Plain_Text_Format.Plain_Text ".log" -> Plain_Text_Format.Plain_Text _ -> Nothing + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> Plain_Text_Format | Nothing + for_file_write file = Plain_Text_Format.for_file_read file + ## PRIVATE If the File_Format supports reading from the web response, return a configured instance. for_web : Text -> URI -> Plain_Text_Format | Nothing @@ -127,12 +144,17 @@ type Plain_Text_Format type Bytes ## PRIVATE If the File_Format supports reading from the file, return a configured instance. - for_file : File -> Bytes | Nothing - for_file file = + for_file_read : File -> Bytes | Nothing + for_file_read file = case file.extension of ".dat" -> Bytes _ -> Nothing + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> Bytes | Nothing + for_file_write file = Bytes.for_file_read file + ## PRIVATE If the File_Format supports reading from the web response, return a configured instance. As `Bytes`, does not support reading from the web returns `Nothing`. @@ -148,13 +170,18 @@ type Bytes type JSON_Format ## PRIVATE If the File_Format supports reading from the file, return a configured instance. - for_file : File -> JSON_Format | Nothing - for_file file = + for_file_read : File -> JSON_Format | Nothing + for_file_read file = case file.extension of ".json" -> JSON_Format ".geojson" -> JSON_Format _ -> Nothing + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> JSON_Format | Nothing + for_file_write file = JSON_Format.for_file_read file + ## PRIVATE If the File_Format supports reading from the web response, return a configured instance. for_web : Text -> URI -> JSON_Format | Nothing diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso index 05b46d7796fe..97d0e6bcdc63 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso @@ -11,8 +11,15 @@ type SQLite_Format ## PRIVATE If the File_Format supports reading from the file, return a configured instance. - for_file : File -> SQLite_Format | Nothing - for_file file = + for_file_read : File -> SQLite_Format | Nothing + for_file_read file = + expected_header = magic_header_string + got_header = file.read_first_bytes header.length + + ## PRIVATE + If the File_Format supports writing to the file, return a configured instance. + for_file_write : File -> SQLite_Format | Nothing + for_file_write file = case file.extension of ".db" -> SQLite_Format.For_File ".sqlite" -> SQLite_Format.For_File @@ -31,3 +38,7 @@ type SQLite_Format read self file on_problems = _ = [on_problems] Database.connect (SQLite_Details.SQLite file) + +## PRIVATE +magic_header_string = + "SQLite format 3".utf_8 + [0] diff --git a/distribution/lib/Standard/Image/0.0.0-dev/src/Image_File_Format.enso b/distribution/lib/Standard/Image/0.0.0-dev/src/Image_File_Format.enso index d5e80347ff50..80acec43a217 100644 --- a/distribution/lib/Standard/Image/0.0.0-dev/src/Image_File_Format.enso +++ b/distribution/lib/Standard/Image/0.0.0-dev/src/Image_File_Format.enso @@ -14,11 +14,16 @@ type Image_File_Format ## PRIVATE If the File_Format supports reading from the file, return a configured instance. - for_file : File -> Image_File_Format | Nothing - for_file file = + for_file_read : File -> Image_File_Format | Nothing + for_file_read file = extension = file.extension if supported.contains extension then Image_File_Format.For_File else Nothing + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> Image_File_Format | Nothing + for_file_write file = Image_File_Format.for_file_read file + ## PRIVATE If the File_Format supports reading from the web response, return a configured instance. for_web : Text -> URI -> Image_File_Format | Nothing diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 936bee0f591c..ec9f272aa79f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1928,7 +1928,7 @@ type Table file = File.new path case format of _ : Auto_Detect -> - base_format = format.get_format file + base_format = format.get_writing_format file if base_format == Nothing then Error.throw (File_Error.Unsupported_Output_Type file Table) else self.write file format=base_format on_existing_file match_columns on_problems _ -> diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso index 67910e938f2f..0b2f35252bd6 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso @@ -54,14 +54,19 @@ type Delimited_Format ## PRIVATE ADVANCED If the File_Format supports reading from the file, return a configured instance. - for_file : File -> Delimited_Format | Nothing - for_file file = + for_file_read : File -> Delimited_Format | Nothing + for_file_read file = case file.extension of ".csv" -> Delimited_Format.Delimited ',' ".tab" -> Delimited_Format.Delimited '\t' ".tsv" -> Delimited_Format.Delimited '\t' _ -> Nothing + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> Delimited_Format | Nothing + for_file_write file = Delimited_Format.for_file_read file + ## PRIVATE ADVANCED If the File_Format supports reading from the web response, return a configured instance. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso index 84c75121f3bf..4d7c6adc44d6 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso @@ -48,12 +48,17 @@ type Excel_Format ## PRIVATE ADVANCED If the File_Format supports reading from the file, return a configured instance. - for_file : File -> Excel_Format | Nothing - for_file file = + for_file_read : File -> Excel_Format | Nothing + for_file_read file = is_xls = should_treat_as_xls_format Infer file if is_xls.is_error then Nothing else Excel_Format.Excel xls_format=is_xls + ## PRIVATE + If this File_Format should be used for writing to that file, return a configured instance. + for_file_write : File -> Excel_Format | Nothing + for_file_write file = Excel_Format.for_file_read file + ## PRIVATE ADVANCED If the File_Format supports reading from the web response, return a configured instance. diff --git a/test/Image_Tests/src/Image_Read_Write_Spec.enso b/test/Image_Tests/src/Image_Read_Write_Spec.enso index 273c9ecb22fe..b69eb09695c5 100644 --- a/test/Image_Tests/src/Image_Read_Write_Spec.enso +++ b/test/Image_Tests/src/Image_Read_Write_Spec.enso @@ -69,9 +69,9 @@ spec = Test.group "Image File_Format" <| Test.specify "should recognise image files" <| - Auto_Detect.get_format (enso_project.data / "data.jpg") . should_be_a Image_File_Format - Auto_Detect.get_format (enso_project.data / "data.png") . should_be_a Image_File_Format - Auto_Detect.get_format (enso_project.data / "data.bmp") . should_be_a Image_File_Format + Auto_Detect.get_reading_format (enso_project.data / "data.jpg") . should_be_a Image_File_Format + Auto_Detect.get_reading_format (enso_project.data / "data.png") . should_be_a Image_File_Format + Auto_Detect.get_reading_format (enso_project.data / "data.bmp") . should_be_a Image_File_Format Test.specify "should allow reading an Image" <| img = Data.read rgba_file diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index 47b25dd43433..f12424b9078a 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -175,11 +175,11 @@ spec = connection.close Test.specify "should recognise a SQLite database file" <| - Auto_Detect.get_format file . should_be_a SQLite_Format + Auto_Detect.get_reading_format file . should_be_a SQLite_Format Test.specify "should recognise a sqlite file by extension fro writing" <| - Auto_Detect.get_format (enso_project.data / "nonexistent-data.db") . should_be_a SQLite_Format - Auto_Detect.get_format (enso_project.data / "nonexistent-data.sqlite") . should_be_a SQLite_Format + Auto_Detect.get_reading_format (enso_project.data / "nonexistent-data.db") . should_be_a SQLite_Format + Auto_Detect.get_reading_format (enso_project.data / "nonexistent-data.sqlite") . should_be_a SQLite_Format Test.specify "should not recognise nonexistent or empty files for reading" <| r1 = Data.read (enso_project.data / "nonexistent-data.db") From 81530c8694281aa0cdfc655b4d6744e1e69e14ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 17:35:46 +0200 Subject: [PATCH 09/14] missing method --- .../lib/Standard/Base/0.0.0-dev/src/Data/Array.enso | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso index e5d742c9d493..70a5e1668893 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso @@ -438,6 +438,8 @@ type Array flatten : Vector Any flatten self = Vector.flatten self + ## PRIVATE + ADVANCED short_display_text : Integer -> Text short_display_text self max_entries=10 = Vector.short_display_text self max_entries @@ -641,9 +643,15 @@ type Array join : Text -> Text -> Text -> Text join self separator="" prefix="" suffix="" = Vector.join self separator prefix suffix + ## PRIVATE + Generates a human-readable text representation of the array. to_text : Text to_text self = self.map .to_text . join ", " "[" "]" + ## PRIVATE + to_display_text : Text + to_display_text self = self.short_display_text max_entries=40 + ## Combines all the elements of a non-empty array using a binary operation. If the array is empty, it returns `if_empty`. From 8c61106a0e5a257a7873ae59d9c37884cdebea44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 17:43:51 +0200 Subject: [PATCH 10/14] fixes --- .../Database/0.0.0-dev/src/Connection/SQLite_Format.enso | 3 ++- .../lib/Standard/Test/0.0.0-dev/src/Extensions.enso | 3 ++- test/Table_Tests/src/Database/SQLite_Spec.enso | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso index 97d0e6bcdc63..42e01f7b34a7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso @@ -14,7 +14,8 @@ type SQLite_Format for_file_read : File -> SQLite_Format | Nothing for_file_read file = expected_header = magic_header_string - got_header = file.read_first_bytes header.length + got_header = file.read_first_bytes expected_header.length + if got_header == expected_header then SQLite_Format.For_File else Nothing ## PRIVATE If the File_Format supports writing to the file, return a configured instance. diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso index 5c7793bbc78f..96638e976f23 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso @@ -284,7 +284,8 @@ Error.should_succeed self frames_to_skip=0 = ## Handles an unexpected dataflow error. Error.should_be_a : Integer -> Any -Error.should_be_a self frames_to_skip=0 = +Error.should_be_a self typ frames_to_skip=0 = + _ = typ Test.fail_match_on_unexpected_error self 1+frames_to_skip ## Asserts that the given `Boolean` is `True` diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index f12424b9078a..20c7561ef9ba 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -1,6 +1,7 @@ from Standard.Base import all import Standard.Base.Runtime.Ref.Ref import Standard.Base.Runtime.Context +import Standard.Base.Errors.File_Error.File_Error import Standard.Table.Data.Type.Value_Type.Bits from Standard.Table import Table, Value_Type @@ -177,9 +178,9 @@ spec = Test.specify "should recognise a SQLite database file" <| Auto_Detect.get_reading_format file . should_be_a SQLite_Format - Test.specify "should recognise a sqlite file by extension fro writing" <| - Auto_Detect.get_reading_format (enso_project.data / "nonexistent-data.db") . should_be_a SQLite_Format - Auto_Detect.get_reading_format (enso_project.data / "nonexistent-data.sqlite") . should_be_a SQLite_Format + Test.specify "should recognise a sqlite file by extension for writing" <| + Auto_Detect.get_writing_format (enso_project.data / "nonexistent-data.db") . should_be_a SQLite_Format + Auto_Detect.get_writing_format (enso_project.data / "nonexistent-data.sqlite") . should_be_a SQLite_Format Test.specify "should not recognise nonexistent or empty files for reading" <| r1 = Data.read (enso_project.data / "nonexistent-data.db") From 2b9e98104c0a8a9009c7d519fc5528004aa69577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 18:07:18 +0200 Subject: [PATCH 11/14] add a note --- .../Database/0.0.0-dev/src/Connection/SQLite_Format.enso | 1 + 1 file changed, 1 insertion(+) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso index 42e01f7b34a7..3bd0f90864e8 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/SQLite_Format.enso @@ -41,5 +41,6 @@ type SQLite_Format Database.connect (SQLite_Details.SQLite file) ## PRIVATE + Based on the File Format definition at: https://www.sqlite.org/fileformat.html magic_header_string = "SQLite format 3".utf_8 + [0] From 8dfc2100d1faf8214dc4b82cf6f5826762bd5703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 18:10:38 +0200 Subject: [PATCH 12/14] check malformed file --- test/Table_Tests/src/Database/SQLite_Spec.enso | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index 20c7561ef9ba..615f9f4deac0 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -194,6 +194,13 @@ spec = r2.catch . should_be_a File_Error.Unsupported_Type empty.delete_if_exists + broken = enso_project.data / "transient" / "empty-data.db" + "SOME_RANDOM_DATA".write empty on_existing_file=Existing_File_Behavior.Overwrite . should_succeed + r3 = Data.read broken + r3.should_fail_with File_Error + r3.catch . should_be_a File_Error.Unsupported_Type + broken.delete_if_exists + Test.specify "should connect to a db file" <| connection = Data.read file tables = connection.tables From feaecc051ddc7871f5f677297d5000be1e552a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 8 May 2023 23:34:16 +0200 Subject: [PATCH 13/14] disable a test due to bug #6609 --- test/Tests/src/Semantic/Java_Interop_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Tests/src/Semantic/Java_Interop_Spec.enso b/test/Tests/src/Semantic/Java_Interop_Spec.enso index 9e65574dac89..a1ac2a913187 100644 --- a/test/Tests/src/Semantic/Java_Interop_Spec.enso +++ b/test/Tests/src/Semantic/Java_Interop_Spec.enso @@ -25,7 +25,7 @@ spec = list = ArrayList.new list.add 432 list.get 0 . should_equal 432 - Test.specify "should report missing method error on Java Arrays" <| + Test.specify "should report missing method error on Java Arrays" pending="Failing due to #6609" <| list = ArrayList.new list.add 432 Test.expect_panic_with (list.asList) No_Such_Method From e6a8bfc7c95309c250e680998c0cdb09a2863109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 9 May 2023 15:22:22 +0200 Subject: [PATCH 14/14] Update test/Tests/src/System/File_Read_Spec.enso Co-authored-by: James Dunkerley --- test/Tests/src/System/File_Read_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Tests/src/System/File_Read_Spec.enso b/test/Tests/src/System/File_Read_Spec.enso index 51b144edd6fa..c294d5df3274 100644 --- a/test/Tests/src/System/File_Read_Spec.enso +++ b/test/Tests/src/System/File_Read_Spec.enso @@ -20,7 +20,7 @@ spec = content = sample_txt.read content.should_equal "Hello World!" - Test.specify "should raise an not-found error when reading a nonexistent file even of unknown format" <| + Test.specify "should raise a not-found error when reading a nonexistent file even of unknown format" <| r1 = (File.new "nonexistent.file.of.weird-format").read r1.should_fail_with File_Error r1.catch.should_be_a File_Error.Not_Found