From 2fce9edc6745ae48561efcade11cf0da9de9ca0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 23 Feb 2022 19:49:27 +0100 Subject: [PATCH 1/7] Move to_upper_case and to_lower_case into to_case --- .../Base/0.0.0-dev/src/Data/Text/Case.enso | 10 ++++ .../0.0.0-dev/src/Data/Text/Extensions.enso | 53 +++++-------------- .../Table/0.0.0-dev/src/Data/Table.enso | 2 +- .../0.0.0-dev/src/Scatter_Plot.enso | 2 +- test/Tests/src/Data/Text_Spec.enso | 12 ++--- 5 files changed, 32 insertions(+), 47 deletions(-) create mode 100644 distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case.enso diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case.enso new file mode 100644 index 000000000000..50e6821b3a87 --- /dev/null +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case.enso @@ -0,0 +1,10 @@ +## Specifies the casing options for text conversion. +type Case + ## All letters in lower case. + type Lower + + ## All letters in upper case. + type Upper + + ## First letter of each word in upper case, rest in lower case. + type Title diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 4067572d3f5d..0896cbfaa093 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -5,6 +5,7 @@ from Standard.Builtins import Text, Prim_Text_Helpers import Standard.Base.Data.Text.Regex import Standard.Base.Data.Text.Regex.Mode +import Standard.Base.Data.Text.Case import Standard.Base.Data.Text.Line_Ending_Style import Standard.Base.Data.Text.Split_Kind import Standard.Base.Data.Text.Text_Sub_Range @@ -13,6 +14,7 @@ import Standard.Base.Meta from Standard.Builtins export Text +export Standard.Base.Data.Text.Case export Standard.Base.Data.Text.Split_Kind export Standard.Base.Data.Text.Line_Ending_Style @@ -551,7 +553,7 @@ Text.equals_ignore_case that locale=Locale.default = used to perform case-insensitive comparisons. Text.to_case_insensitive_key : Locale -> Text Text.to_case_insensitive_key locale=Locale.default = - this.to_lower_case locale . to_upper_case locale + this.to_case Case.Lower locale . to_case Case.Upper locale ## Compare two texts to discover their ordering. @@ -984,40 +986,11 @@ Text.drop range = if char_range.end == (Text_Utils.char_length this) then prefix else prefix + Text_Utils.drop_first this char_range.end -## ALIAS Lower Case - - Converts each character in `this` to lower case. - - Arguments: - - locale: specifies the locale for character case mapping. Defaults to the - `Locale.default` locale. - - ! What is a Character? - A character is defined as an Extended Grapheme Cluster, see Unicode - Standard Annex 29. This is the smallest unit that still has semantic - meaning in most text-processing applications. - - > Example - Converting a text to lower case in the default locale: - - "My TeXt!".to_lower_case == "my text!" - - > Example - Converting a text to lower case in a specified locale (here, Turkey): - - from Standard.Base import all - import Standard.Base.Data.Locale - - example_case_with_locale = "I".to_lower_case (Locale.new "tr") == "Δ±" -Text.to_lower_case : Locale.Locale -> Text -Text.to_lower_case locale=Locale.default = - UCharacter.toLowerCase locale.java_locale this - -## ALIAS Upper Case - - Converts each character in `this` to upper case. +## ALIAS lower, upper, title, proper + Converts each character in `this` to the specified case. Arguments: + - case_option: specifies how to convert the characters. - locale: specifies the locale for character case mapping. Defaults to `Locale.default`. @@ -1027,9 +1000,9 @@ Text.to_lower_case locale=Locale.default = meaning in most text-processing applications. > Example - Converting a text to upper case in the default locale: + Converting a text to lower case in the default locale: - "My TeXt!".to_upper_case == "MY TEXT!" + "My TeXt!".to_case == "my text!" > Example Converting a text to upper case in a specified locale: @@ -1037,7 +1010,9 @@ Text.to_lower_case locale=Locale.default = from Standard.Base import all import Standard.Base.Data.Locale - example_case_with_locale = "i".to_upper_case (Locale.new "tr") == "Δ°" -Text.to_upper_case : Locale.Locale -> Text -Text.to_upper_case locale=Locale.default = - UCharacter.toUpperCase locale.java_locale this + example_case_with_locale = "i".to_case Upper (Locale.new "tr") == "Δ°" +Text.to_case : Case -> Locale -> Text +Text.to_case case_option=Case.Lower locale=Locale.Default = case case_option of + Case.Lower -> UCharacter.toLowerCase locale.java_locale this + Case.Upper -> UCharacter.toUpperCase locale.java_locale this + Case.Title -> Error.throw "TODO" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index a563f83d3c6d..c3e67e5041fd 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -218,7 +218,7 @@ type Table representation for this table. default_visualization : Visualization.Id.Id default_visualization = - cols = this.columns.map .name . map .to_lower_case + cols = this.columns.map .name . map name-> name.to_case Case.Lower if cols.contains "latitude" && cols.contains "longitude" then Visualization.Id.geo_map else if cols.contains "x" && cols.contains "y" then Visualization.Id.scatter_plot else Visualization.Id.table diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso index f62d994ef8b7..a520e681b038 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso @@ -64,7 +64,7 @@ type PointData ## PRIVATE name : Text - name = this.to_text.to_lower_case + name = this.to_text.to_case Case.Lower ## PRIVATE fallback_column : Table -> Column ! No_Fallback_Column diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index e1c0dc0d745a..74ea3dd90802 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -300,12 +300,12 @@ spec = 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜πŸ˜ŽπŸ˜™πŸ˜‰β˜Ί'.drop (Range -3 -1) . should_equal 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜πŸ˜Žβ˜Ί' Test.specify "should correctly convert character case" <| - "FooBar Baz".to_lower_case.should_equal "foobar baz" - "FooBar Baz".to_upper_case.should_equal "FOOBAR BAZ" - "i".to_upper_case . should_equal "I" - "I".to_lower_case . should_equal "i" - "i".to_upper_case (Locale.new "tr") . should_equal "Δ°" - "I".to_lower_case (Locale.new "tr") . should_equal "Δ±" + "FooBar Baz".to_case Case.Lower . should_equal "foobar baz" + "FooBar Baz".to_case Case.Upper . should_equal "FOOBAR BAZ" + "i".to_case Case.Upper . should_equal "I" + "I".to_case Case.Lower . should_equal "i" + "i".to_case Case.Upper (Locale.new "tr") . should_equal "Δ°" + "I".to_case Case.Lower (Locale.new "tr") . should_equal "Δ±" Test.specify "should dump utf-16 characters to a vector" <| kshi_chars = kshi.utf_16 From 04edb3e5857eef6dc12306c9b7b2ee7b75cf71db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 23 Feb 2022 19:56:17 +0100 Subject: [PATCH 2/7] Add an export, not sure about it --- distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso index 459a28a04f71..18ce9577fe8e 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso @@ -47,7 +47,8 @@ from project.Data.Number.Extensions export all hiding Math, String, Double from project.Data.Noise export all hiding Noise from project.Data.Pair export Pair from project.Data.Range export Range -from project.Data.Text.Extensions export Text, Split_Kind, Line_Ending_Style +# TODO [RW] do we want to export case? I don't want to pollute too much, but without autoscoping to_case is unusable as you'd need to import it. +from project.Data.Text.Extensions export Text, Split_Kind, Line_Ending_Style, Case from project.Data.Text.Matching export Case_Insensitive, Text_Matcher, Regex_Matcher from project.Error.Common export all from project.Error.Extensions export all From 5774a88bebac3e04a733375118ddff5fae3d9623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 24 Feb 2022 21:18:22 +0100 Subject: [PATCH 3/7] Implement title case TODO: some more tests would be good --- .../lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 2 +- test/Tests/src/Data/Text_Spec.enso | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 0896cbfaa093..5480ec0d616c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -1015,4 +1015,4 @@ Text.to_case : Case -> Locale -> Text Text.to_case case_option=Case.Lower locale=Locale.Default = case case_option of Case.Lower -> UCharacter.toLowerCase locale.java_locale this Case.Upper -> UCharacter.toUpperCase locale.java_locale this - Case.Title -> Error.throw "TODO" + Case.Title -> UCharacter.toTitleCase locale.java_locale this Nothing diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 74ea3dd90802..4ba8240abc2d 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -306,6 +306,7 @@ spec = "I".to_case Case.Lower . should_equal "i" "i".to_case Case.Upper (Locale.new "tr") . should_equal "Δ°" "I".to_case Case.Lower (Locale.new "tr") . should_equal "Δ±" + "foo bar baz".to_case Case.Title . should_equal "Foo Bar Baz" Test.specify "should dump utf-16 characters to a vector" <| kshi_chars = kshi.utf_16 From 3740263d27c589c855d8a4802320c2aae1fe52d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 28 Feb 2022 13:08:43 +0100 Subject: [PATCH 4/7] Add more tests --- test/Tests/src/Data/Text_Spec.enso | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 4ba8240abc2d..c330dc903c05 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -302,11 +302,31 @@ spec = Test.specify "should correctly convert character case" <| "FooBar Baz".to_case Case.Lower . should_equal "foobar baz" "FooBar Baz".to_case Case.Upper . should_equal "FOOBAR BAZ" + + "foo bar baz".to_case Case.Title . should_equal "Foo Bar Baz" + "foo-bar, baz.baz foo_foo".to_case Case.Title . should_equal "Foo-Bar, Baz.baz Foo_foo" + "i".to_case Case.Upper . should_equal "I" "I".to_case Case.Lower . should_equal "i" "i".to_case Case.Upper (Locale.new "tr") . should_equal "Δ°" "I".to_case Case.Lower (Locale.new "tr") . should_equal "Δ±" - "foo bar baz".to_case Case.Title . should_equal "Foo Bar Baz" + "Δ°".to_case Case.Lower . should_equal "iΜ‡" + "Δ±".to_case Case.Upper . should_equal "I" + + "Straße".to_case Case.Upper . should_equal "STRASSE" + "STRASSE".to_case Case.Lower . should_equal "strasse" + "et cΓ¦tera".to_case Case.Upper . should_equal "ET CΓ†TERA" + ("Ξ²".to_case Case.Upper == "B") . should_be_false + "δλφξ".to_case Case.Upper . should_equal "Ξ”Ξ›Ξ¦Ξž" + "Ξ”Ξ›Ξ¦Ξž".to_case Case.Lower . should_equal "δλφξ" + "δλ φξ".to_case Case.Title . should_equal "Δλ Φξ" + + 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί'.to_case Case.Upper . should_equal 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί' + 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί'.to_case Case.Lower . should_equal 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί' + 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί'.to_case Case.Title . should_equal 'βœ¨πŸš€πŸš§πŸ˜πŸ˜ƒπŸ˜ŽπŸ˜™πŸ˜‰β˜Ί' + + "123".to_case Case.Upper . should_equal "123" + "abc123".to_case Case.Upper . should_equal "ABC123" Test.specify "should dump utf-16 characters to a vector" <| kshi_chars = kshi.utf_16 From bca90deb212a8a7cc7e208b761d5306c2cd020f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 28 Feb 2022 14:45:00 +0100 Subject: [PATCH 5/7] explain title case --- .../Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 6 ++++++ test/Tests/src/Data/Text_Spec.enso | 1 + 2 files changed, 7 insertions(+) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 5480ec0d616c..6dede427d0f6 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -999,6 +999,12 @@ Text.drop range = Standard Annex 29. This is the smallest unit that still has semantic meaning in most text-processing applications. + ! What is title case? + Title case capitalizes the first letter of every word and ensures that all + the remaining letters are in lower case. Some definitions of title case + avoid capitalizing minor words (like the article "the" in English) but this + implementation treats all words in the same way. + > Example Converting a text to lower case in the default locale: diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index c330dc903c05..4172669e4820 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -305,6 +305,7 @@ spec = "foo bar baz".to_case Case.Title . should_equal "Foo Bar Baz" "foo-bar, baz.baz foo_foo".to_case Case.Title . should_equal "Foo-Bar, Baz.baz Foo_foo" + "jAck the rippER".to_case Case.Title (Locale.uk) . should_equal "Jack The Ripper" "i".to_case Case.Upper . should_equal "I" "I".to_case Case.Lower . should_equal "i" From ae9a3e679f80dc766e056b4bf4f6f7b41edc1695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 28 Feb 2022 16:05:21 +0100 Subject: [PATCH 6/7] fix todo --- distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso index 18ce9577fe8e..702831bd47b3 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso @@ -47,7 +47,13 @@ from project.Data.Number.Extensions export all hiding Math, String, Double from project.Data.Noise export all hiding Noise from project.Data.Pair export Pair from project.Data.Range export Range -# TODO [RW] do we want to export case? I don't want to pollute too much, but without autoscoping to_case is unusable as you'd need to import it. +## TODO [RW] Once autoscoping is implemented or automatic imports for ADTs are + fixed in the IDE, we should revisit if we want to export ADTs like `Case` by + default. It may be unnecessary pollution of scope, but until the issues are + fixed, common standard library functions are almost unusable in the GUI. + Relevant issues: + https://www.pivotaltracker.com/story/show/181403340 + https://www.pivotaltracker.com/story/show/181309938 from project.Data.Text.Extensions export Text, Split_Kind, Line_Ending_Style, Case from project.Data.Text.Matching export Case_Insensitive, Text_Matcher, Regex_Matcher from project.Error.Common export all From 2046d6e53548ebac82a7bbcd03450c85d03ea61b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 28 Feb 2022 17:03:24 +0100 Subject: [PATCH 7/7] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e82a8d742733..a8629ad56b3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,8 @@ functions][3287] - [Implemented new `Text.starts_with` and `Text.ends_with` functions, replacing existing functions][3292] +- [Implemented `Text.to_case`, replacing `Text.to_lower_case` and + `Text.to_upper_case`][3302] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -85,6 +87,7 @@ [3285]: https://github.com/enso-org/enso/pull/3285 [3287]: https://github.com/enso-org/enso/pull/3287 [3292]: https://github.com/enso-org/enso/pull/3292 +[3302]: https://github.com/enso-org/enso/pull/3302 #### Enso Compiler