From ee7019b5bd8520ca06c0f04b8d43cd26ce955dcd Mon Sep 17 00:00:00 2001 From: austinletson Date: Fri, 26 Apr 2024 19:42:39 -0400 Subject: [PATCH 01/18] doc: add docstrings and usage examples in `Init.Data.String.Basic` Add docstrings and usage examples for `String.length`, `.push`, `.append`, `.get?`, `.set`, `.modyify`, and `.next`. Update docstrings and add usage examples for `String.toList`, `.get`, and `.get!`. --- src/Init/Data/String/Basic.lean | 70 ++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 1dbe48e753a1..76a947790c40 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -24,23 +24,41 @@ instance : LT String := instance decLt (s₁ s₂ : @& String) : Decidable (s₁ < s₂) := List.hasDecidableLt s₁.data s₂.data +/-- +Returns the length of a string. +* `"".length = 0` +* `"abc".length = 3` +-/ @[extern "lean_string_length"] def length : (@& String) → Nat | ⟨s⟩ => s.length -/-- The internal implementation uses dynamic arrays and will perform destructive updates - if the String is not shared. -/ +/-- +Pushes a character onto the end of a string. + +The internal implementation uses dynamic arrays and will perform destructive updates +if the String is not shared. +* `"abc".push 'd' = "abcd"` +-/ @[extern "lean_string_push"] def push : String → Char → String | ⟨s⟩, c => ⟨s ++ [c]⟩ -/-- The internal implementation uses dynamic arrays and will perform destructive updates - if the String is not shared. -/ +/-- +Appends two strings. + +The internal implementation uses dynamic arrays and will perform destructive updates +if the String is not shared. +* `"abc".append "def" = "abcdef"` +-/ @[extern "lean_string_append"] def append : String → (@& String) → String | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩ -/-- O(n) in the runtime, where n is the length of the String -/ +/-- +`O(s.length)`. Converts a string to a list of characters. +* `"abc".toList = ['a', 'b', 'c']` +-/ def toList (s : String) : List Char := s.data @@ -59,9 +77,13 @@ def utf8GetAux : List Char → Pos → Pos → Char | c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p /-- - Return character at position `p`. If `p` is not a valid position - returns `(default : Char)`. - See `utf8GetAux` for the reference implementation. +Returns the character at position `p` of a string. + +If `p` is not a valid position, returns `(default : Char)`. + +See `utf8GetAux` for the reference implementation. +* `"abc".get ⟨1⟩ = 'b'` +* `"abc".get ⟨3⟩ = (default : Char) = 'A'` -/ @[extern "lean_string_utf8_get"] def get (s : @& String) (p : @& Pos) : Char := @@ -72,12 +94,21 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char | [], _, _ => none | c::cs, i, p => if i = p then c else utf8GetAux? cs (i + c) p +/-- +Returns the character at position `p`. + +If `p` is not a valid position, returns `none`. +* `"abc".get ⟨1⟩ = some 'b'` +* `"abc".get ⟨3⟩ = none` +-/ @[extern "lean_string_utf8_get_opt"] def get? : (@& String) → (@& Pos) → Option Char | ⟨s⟩, p => utf8GetAux? s 0 p /-- - Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`. +Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`. +* `"abc".get! ⟨1⟩ = 'b'` +* `"abc".get! ⟨3⟩ => panic!` -/ @[extern "lean_string_utf8_get_bang"] def get! (s : @& String) (p : @& Pos) : Char := @@ -89,13 +120,34 @@ def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char | c::cs, i, p => if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p) +/-- +Replaces the character at a specified position in a string with a new character. + +If the position is invalid, returns the string unchanged. +* `"abc".set ⟨1⟩ 'B' = "aBc"` +* `"abc".set ⟨3⟩ 'D' = "abc"` +-/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String | ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩ +/-- +Applies `f` to the character at position `p` in the string `s`. + +If `p` is an invalid position, returns the string unchanged. +* `abc.modify ⟨1⟩ Char.toUpper = "aBc"` +* `abc.modify ⟨3⟩ Char.toUpper = "abc"` +-/ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i +/-- +Returns the next position in a string after `p`. + +Note `p.next` is not necessarily a valid position in the string `s`. +* `"abc".next ⟨1⟩ = String.Pos.mk 2` +* `"abc".next ⟨3⟩ = String.Pos.mk 4` +-/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := let c := get s p From ac7eabb0ca0d422c869fe669ddb06bc1e56cab57 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Sun, 28 Apr 2024 07:45:10 -0400 Subject: [PATCH 02/18] format: lower case String Co-authored-by: Joachim Breitner --- src/Init/Data/String/Basic.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 76a947790c40..5038c11de5d0 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -37,7 +37,7 @@ def length : (@& String) → Nat Pushes a character onto the end of a string. The internal implementation uses dynamic arrays and will perform destructive updates -if the String is not shared. +if the string is not shared. * `"abc".push 'd' = "abcd"` -/ @[extern "lean_string_push"] From 260d01a7d25cc2d9f1cc331ea9c66986aef29d12 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Sun, 28 Apr 2024 07:46:02 -0400 Subject: [PATCH 03/18] doc: add warning about destructive updates to `String.set` docs Co-authored-by: Joachim Breitner --- src/Init/Data/String/Basic.lean | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 5038c11de5d0..fd95d0757123 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -126,6 +126,8 @@ Replaces the character at a specified position in a string with a new character. If the position is invalid, returns the string unchanged. * `"abc".set ⟨1⟩ 'B' = "aBc"` * `"abc".set ⟨3⟩ 'D' = "abc"` + +If both the replacement character and the replaced character are ASCII characters and the string is not shared, destructive updates are used. -/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String From 6165eb7aaef0f73fcfb0a1c79bc3867c5e171e7e Mon Sep 17 00:00:00 2001 From: austinletson Date: Sun, 28 Apr 2024 08:27:52 -0400 Subject: [PATCH 04/18] doc: add usage examples for multi-byte chars --- src/Init/Data/String/Basic.lean | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index fd95d0757123..79f59f69177a 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -84,6 +84,7 @@ If `p` is not a valid position, returns `(default : Char)`. See `utf8GetAux` for the reference implementation. * `"abc".get ⟨1⟩ = 'b'` * `"abc".get ⟨3⟩ = (default : Char) = 'A'` +* `"L∃∀N".get ⟨2⟩ = (default : Char) = 'A' -- invalid UTF-8 position as '∃' is a mutli-byte character` -/ @[extern "lean_string_utf8_get"] def get (s : @& String) (p : @& Pos) : Char := @@ -98,8 +99,9 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char Returns the character at position `p`. If `p` is not a valid position, returns `none`. -* `"abc".get ⟨1⟩ = some 'b'` -* `"abc".get ⟨3⟩ = none` +* `"abc".get? ⟨1⟩ = some 'b'` +* `"abc".get? ⟨3⟩ = none` +* `"L∃∀N".get? ⟨2⟩ = none -- invalid UTF-8 position as '∃' is a mutli-byte character` -/ @[extern "lean_string_utf8_get_opt"] def get? : (@& String) → (@& Pos) → Option Char @@ -109,6 +111,7 @@ def get? : (@& String) → (@& Pos) → Option Char Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`. * `"abc".get! ⟨1⟩ = 'b'` * `"abc".get! ⟨3⟩ => panic!` +* `"L∃∀N".get! ⟨2⟩ => panic! -- invalid UTF-8 position as '∃' is a mutli-byte character` -/ @[extern "lean_string_utf8_get_bang"] def get! (s : @& String) (p : @& Pos) : Char := @@ -126,6 +129,7 @@ Replaces the character at a specified position in a string with a new character. If the position is invalid, returns the string unchanged. * `"abc".set ⟨1⟩ 'B' = "aBc"` * `"abc".set ⟨3⟩ 'D' = "abc"` +* `"L∃∀N".set ⟨1⟩ 'X' = "L∃∀N" -- invalid position as '∃' is a mutli-byte character` If both the replacement character and the replaced character are ASCII characters and the string is not shared, destructive updates are used. -/ @@ -146,9 +150,10 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := /-- Returns the next position in a string after `p`. -Note `p.next` is not necessarily a valid position in the string `s`. +If `p` is an invalid position or `p = s.endPos`, `p.next` could return an invalid position * `"abc".next ⟨1⟩ = String.Pos.mk 2` * `"abc".next ⟨3⟩ = String.Pos.mk 4` +* `"L∃∀N".next ⟨1⟩ => String.Pos.mk 4 -- '∃' is a mutli-byte character` -/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := From f77bead1c40766a9e0f7377296480c009a7d279c Mon Sep 17 00:00:00 2001 From: austinletson Date: Sun, 28 Apr 2024 08:39:17 -0400 Subject: [PATCH 05/18] doc: updated `next` to include refernce to valid position --- src/Init/Data/String/Basic.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 79f59f69177a..49da4d79c60f 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -148,7 +148,7 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after `p`. +Returns the next position in a string after `p` given that `p` is a valid position. If `p` is an invalid position or `p = s.endPos`, `p.next` could return an invalid position * `"abc".next ⟨1⟩ = String.Pos.mk 2` From c9f4750e3888725224e89a280e8acc06124cf44a Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Sun, 28 Apr 2024 12:15:33 -0400 Subject: [PATCH 06/18] docs: `next` wordsmithing Co-authored-by: Joachim Breitner --- src/Init/Data/String/Basic.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 49da4d79c60f..f12ab9b3cf0c 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -148,7 +148,9 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after `p` given that `p` is a valid position. +Returns the next position in a string after position `p`. + +If `p` is not a valid position, the result is unspecified. If `p` is an invalid position or `p = s.endPos`, `p.next` could return an invalid position * `"abc".next ⟨1⟩ = String.Pos.mk 2` From 808ab0d9dcc8e9a6c9228d1e148997e346f692ba Mon Sep 17 00:00:00 2001 From: austinletson Date: Sun, 28 Apr 2024 12:18:15 -0400 Subject: [PATCH 07/18] doc: added case where `p = s.endPos` to `next` docstring --- src/Init/Data/String/Basic.lean | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index f12ab9b3cf0c..2d218bae63a7 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -150,9 +150,7 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := /-- Returns the next position in a string after position `p`. -If `p` is not a valid position, the result is unspecified. - -If `p` is an invalid position or `p = s.endPos`, `p.next` could return an invalid position +If `p` is not a valid position or `p = s.endPos`, the result is unspecified. * `"abc".next ⟨1⟩ = String.Pos.mk 2` * `"abc".next ⟨3⟩ = String.Pos.mk 4` * `"L∃∀N".next ⟨1⟩ => String.Pos.mk 4 -- '∃' is a mutli-byte character` From 96b2c645731c9c9f4724eb6543d60abf9f6c388a Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Mon, 6 May 2024 08:03:55 -0400 Subject: [PATCH 08/18] doc: add note of Unicode code points to String.length Co-authored-by: David Thrane Christiansen --- src/Init/Data/String/Basic.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 2d218bae63a7..dad19cbcc729 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -25,7 +25,7 @@ instance decLt (s₁ s₂ : @& String) : Decidable (s₁ < s₂) := List.hasDecidableLt s₁.data s₂.data /-- -Returns the length of a string. +Returns the length of a string in Unicode code points. * `"".length = 0` * `"abc".length = 3` -/ From cec02c35fc91fb6908480c8fc0cd2cc6869eb666 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Mon, 6 May 2024 08:46:46 -0400 Subject: [PATCH 09/18] doc: batch apply David's suggestions Co-authored-by: David Thrane Christiansen --- src/Init/Data/String/Basic.lean | 60 +++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index dad19cbcc729..34d746763888 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -26,6 +26,8 @@ instance decLt (s₁ s₂ : @& String) : Decidable (s₁ < s₂) := /-- Returns the length of a string in Unicode code points. + +Examples: * `"".length = 0` * `"abc".length = 3` -/ @@ -38,7 +40,8 @@ Pushes a character onto the end of a string. The internal implementation uses dynamic arrays and will perform destructive updates if the string is not shared. -* `"abc".push 'd' = "abcd"` + +Example: `"abc".push 'd' = "abcd"` -/ @[extern "lean_string_push"] def push : String → Char → String @@ -48,16 +51,20 @@ def push : String → Char → String Appends two strings. The internal implementation uses dynamic arrays and will perform destructive updates -if the String is not shared. -* `"abc".append "def" = "abcdef"` +if the string is not shared. + +Example: `"abc".append "def" = "abcdef"` -/ @[extern "lean_string_append"] def append : String → (@& String) → String | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩ /-- -`O(s.length)`. Converts a string to a list of characters. -* `"abc".toList = ['a', 'b', 'c']` + Converts a string to a list of characters. + + Even though the logical model of strings is as a structure that wraps a list of characters, this operation takes time and space linear in the length of the string, because the compiler uses an optimized representation as dynamic arrays. + +Example: `"abc".toList = ['a', 'b', 'c']` -/ def toList (s : String) : List Char := s.data @@ -77,14 +84,15 @@ def utf8GetAux : List Char → Pos → Pos → Char | c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p /-- -Returns the character at position `p` of a string. - -If `p` is not a valid position, returns `(default : Char)`. +Returns the character at position `p` of a string. If `p` is not a valid position, returns `(default : Char)`. See `utf8GetAux` for the reference implementation. + +Examples: * `"abc".get ⟨1⟩ = 'b'` * `"abc".get ⟨3⟩ = (default : Char) = 'A'` -* `"L∃∀N".get ⟨2⟩ = (default : Char) = 'A' -- invalid UTF-8 position as '∃' is a mutli-byte character` + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example,`"L∃∀N".get ⟨2⟩ = (default : Char) = 'A'`. -/ @[extern "lean_string_utf8_get"] def get (s : @& String) (p : @& Pos) : Char := @@ -99,19 +107,26 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char Returns the character at position `p`. If `p` is not a valid position, returns `none`. + +Examples: * `"abc".get? ⟨1⟩ = some 'b'` * `"abc".get? ⟨3⟩ = none` -* `"L∃∀N".get? ⟨2⟩ = none -- invalid UTF-8 position as '∃' is a mutli-byte character` + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example, `"L∃∀N".get? ⟨2⟩ = none` -/ @[extern "lean_string_utf8_get_opt"] def get? : (@& String) → (@& Pos) → Option Char | ⟨s⟩, p => utf8GetAux? s 0 p /-- -Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`. +Returns the character at position `p` of a string. If `p` is not a valid position, returns `(default : Char)` and produces a panic error message. + +Examples: * `"abc".get! ⟨1⟩ = 'b'` -* `"abc".get! ⟨3⟩ => panic!` -* `"L∃∀N".get! ⟨2⟩ => panic! -- invalid UTF-8 position as '∃' is a mutli-byte character` +* `"abc".get! ⟨3⟩` panics + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example, +`"L∃∀N".get! ⟨2⟩` panics. -/ @[extern "lean_string_utf8_get_bang"] def get! (s : @& String) (p : @& Pos) : Char := @@ -124,23 +139,26 @@ def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p) /-- -Replaces the character at a specified position in a string with a new character. +Replaces the character at a specified position in a string with a new character. If the position is invalid, the string is returned unchanged. + +If both the replacement character and the replaced character are ASCII characters and the string is not shared, destructive updates are used. -If the position is invalid, returns the string unchanged. +Examples: * `"abc".set ⟨1⟩ 'B' = "aBc"` * `"abc".set ⟨3⟩ 'D' = "abc"` -* `"L∃∀N".set ⟨1⟩ 'X' = "L∃∀N" -- invalid position as '∃' is a mutli-byte character` +* `"L∃∀N".set ⟨1⟩ 'X' = "LX∀N"` + +Because `'∃'` is a multi-byte character, the byte index `2` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. -If both the replacement character and the replaced character are ASCII characters and the string is not shared, destructive updates are used. -/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String | ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩ /-- -Applies `f` to the character at position `p` in the string `s`. +Replaces the character at position `p` in the string `s` with the result of applying `f` to that character. If `p` is an invalid position, the string is returned unchanged. -If `p` is an invalid position, returns the string unchanged. +Examples: * `abc.modify ⟨1⟩ Char.toUpper = "aBc"` * `abc.modify ⟨3⟩ Char.toUpper = "abc"` -/ @@ -148,9 +166,7 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after position `p`. - -If `p` is not a valid position or `p = s.endPos`, the result is unspecified. +Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, the result is unspecified. * `"abc".next ⟨1⟩ = String.Pos.mk 2` * `"abc".next ⟨3⟩ = String.Pos.mk 4` * `"L∃∀N".next ⟨1⟩ => String.Pos.mk 4 -- '∃' is a mutli-byte character` From dcd77bff83c3026c2cc2447162f9f43675ad17cd Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 08:50:52 -0400 Subject: [PATCH 10/18] doc: add example to String.length which differentiates between code points and bytes --- src/Init/Data/String/Basic.lean | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 34d746763888..9112f487c3d9 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -30,6 +30,7 @@ Returns the length of a string in Unicode code points. Examples: * `"".length = 0` * `"abc".length = 3` +* `"L∃∀N".length = 4` -/ @[extern "lean_string_length"] def length : (@& String) → Nat @@ -61,7 +62,7 @@ def append : String → (@& String) → String /-- Converts a string to a list of characters. - + Even though the logical model of strings is as a structure that wraps a list of characters, this operation takes time and space linear in the length of the string, because the compiler uses an optimized representation as dynamic arrays. Example: `"abc".toList = ['a', 'b', 'c']` From f241d83a0aff0e59e8b0f4222f8951730eb7eab7 Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 12:13:15 -0400 Subject: [PATCH 11/18] doc: add context to examples for String.next --- src/Init/Data/String/Basic.lean | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 9112f487c3d9..048bf0ffe77b 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -167,10 +167,18 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, the result is unspecified. -* `"abc".next ⟨1⟩ = String.Pos.mk 2` -* `"abc".next ⟨3⟩ = String.Pos.mk 4` -* `"L∃∀N".next ⟨1⟩ => String.Pos.mk 4 -- '∃' is a mutli-byte character` +Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos - 1`, the result is unspecified. + +Examples: +``` +"abc".next ⟨1⟩ = String.Pos.mk 2 + +-- '∃' is a mutli-byte character +"L∃∀N".next ⟨1⟩ = String.Pos.mk 4 + +-- Since `2 = s.endPos - 1`, the result is an invalid postion +"abc".next ⟨2⟩ = String.Pos.mk 3 +``` -/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := From 47bbfcc85769d353c7fa8fdcd916ff62188a0174 Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 12:46:56 -0400 Subject: [PATCH 12/18] doc: improve examples --- src/Init/Data/String/Basic.lean | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 048bf0ffe77b..710bef93bba7 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -61,9 +61,9 @@ def append : String → (@& String) → String | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩ /-- - Converts a string to a list of characters. +Converts a string to a list of characters. - Even though the logical model of strings is as a structure that wraps a list of characters, this operation takes time and space linear in the length of the string, because the compiler uses an optimized representation as dynamic arrays. +Even though the logical model of strings is as a structure that wraps a list of characters, this operation takes time and space linear in the length of the string, because the compiler uses an optimized representation as dynamic arrays. Example: `"abc".toList = ['a', 'b', 'c']` -/ @@ -147,7 +147,7 @@ If both the replacement character and the replaced character are ASCII character Examples: * `"abc".set ⟨1⟩ 'B' = "aBc"` * `"abc".set ⟨3⟩ 'D' = "abc"` -* `"L∃∀N".set ⟨1⟩ 'X' = "LX∀N"` +* `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"` Because `'∃'` is a multi-byte character, the byte index `2` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. @@ -167,7 +167,7 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos - 1`, the result is unspecified. +Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, the result is unspecified. Examples: ``` @@ -175,9 +175,6 @@ Examples: -- '∃' is a mutli-byte character "L∃∀N".next ⟨1⟩ = String.Pos.mk 4 - --- Since `2 = s.endPos - 1`, the result is an invalid postion -"abc".next ⟨2⟩ = String.Pos.mk 3 ``` -/ @[extern "lean_string_utf8_next"] From 868a1ce88d2a73dd9a99a97ac2ca30cb1f927c78 Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 18:41:49 -0400 Subject: [PATCH 13/18] doc: improve String.next example context --- src/Init/Data/String/Basic.lean | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 710bef93bba7..6a85d34ab623 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -105,9 +105,7 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char | c::cs, i, p => if i = p then c else utf8GetAux? cs (i + c) p /-- -Returns the character at position `p`. - -If `p` is not a valid position, returns `none`. +Returns the character at position `p`. If `p` is not a valid position, returns `none`. Examples: * `"abc".get? ⟨1⟩ = some 'b'` @@ -149,7 +147,7 @@ Examples: * `"abc".set ⟨3⟩ 'D' = "abc"` * `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"` -Because `'∃'` is a multi-byte character, the byte index `2` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. +Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. -/ @[extern "lean_string_utf8_set"] @@ -170,12 +168,16 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, the result is unspecified. Examples: -``` -"abc".next ⟨1⟩ = String.Pos.mk 2 +* `"abc".next ⟨1⟩ = String.Pos.mk 2` +* `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4` +`'∃'` is a mutli-byte character + +Cases where the result is unspecified: +* `"abc".next ⟨3⟩ = String.Pos.mk 4` +Since `3 = s.endPos`, the result is an invalid position +* `"L∃∀N".next ⟨2⟩ = String.Pos.mk 3` +Since `2` points into the middle of a multi-byte UTF-8 character the result is an invalid position --- '∃' is a mutli-byte character -"L∃∀N".next ⟨1⟩ = String.Pos.mk 4 -``` -/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := From 72d3d4eeba2701fa3d5391c1e0006078ed733bd9 Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 18:44:20 -0400 Subject: [PATCH 14/18] doc: remove white space --- src/Init/Data/String/Basic.lean | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 6a85d34ab623..c3a5c3ebe284 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -148,7 +148,6 @@ Examples: * `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"` Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. - -/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String @@ -177,7 +176,6 @@ Cases where the result is unspecified: Since `3 = s.endPos`, the result is an invalid position * `"L∃∀N".next ⟨2⟩ = String.Pos.mk 3` Since `2` points into the middle of a multi-byte UTF-8 character the result is an invalid position - -/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := From 1c144763d64e4028d0202cb5510d2061f99cbb87 Mon Sep 17 00:00:00 2001 From: austinletson Date: Mon, 6 May 2024 19:30:17 -0400 Subject: [PATCH 15/18] doc: add newlines to lines longer than 100 chars --- src/Init/Data/String/Basic.lean | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index c3a5c3ebe284..cd368215bdcd 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -63,7 +63,9 @@ def append : String → (@& String) → String /-- Converts a string to a list of characters. -Even though the logical model of strings is as a structure that wraps a list of characters, this operation takes time and space linear in the length of the string, because the compiler uses an optimized representation as dynamic arrays. +Even though the logical model of strings is as a structure that wraps a list of characters, +this operation takes time and space linear in the length of the string, because the compiler +uses an optimized representation as dynamic arrays. Example: `"abc".toList = ['a', 'b', 'c']` -/ @@ -85,7 +87,8 @@ def utf8GetAux : List Char → Pos → Pos → Char | c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p /-- -Returns the character at position `p` of a string. If `p` is not a valid position, returns `(default : Char)`. +Returns the character at position `p` of a string. If `p` is not a valid position, +returns `(default : Char)`. See `utf8GetAux` for the reference implementation. @@ -93,7 +96,8 @@ Examples: * `"abc".get ⟨1⟩ = 'b'` * `"abc".get ⟨3⟩ = (default : Char) = 'A'` -Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example,`"L∃∀N".get ⟨2⟩ = (default : Char) = 'A'`. +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 +character. For example,`"L∃∀N".get ⟨2⟩ = (default : Char) = 'A'`. -/ @[extern "lean_string_utf8_get"] def get (s : @& String) (p : @& Pos) : Char := @@ -111,14 +115,16 @@ Examples: * `"abc".get? ⟨1⟩ = some 'b'` * `"abc".get? ⟨3⟩ = none` -Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example, `"L∃∀N".get? ⟨2⟩ = none` +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 +character. For example, `"L∃∀N".get? ⟨2⟩ = none` -/ @[extern "lean_string_utf8_get_opt"] def get? : (@& String) → (@& Pos) → Option Char | ⟨s⟩, p => utf8GetAux? s 0 p /-- -Returns the character at position `p` of a string. If `p` is not a valid position, returns `(default : Char)` and produces a panic error message. +Returns the character at position `p` of a string. If `p` is not a valid position, +returns `(default : Char)` and produces a panic error message. Examples: * `"abc".get! ⟨1⟩ = 'b'` @@ -138,23 +144,27 @@ def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p) /-- -Replaces the character at a specified position in a string with a new character. If the position is invalid, the string is returned unchanged. +Replaces the character at a specified position in a string with a new character. If the position +is invalid, the string is returned unchanged. -If both the replacement character and the replaced character are ASCII characters and the string is not shared, destructive updates are used. +If both the replacement character and the replaced character are ASCII characters and the string +is not shared, destructive updates are used. Examples: * `"abc".set ⟨1⟩ 'B' = "aBc"` * `"abc".set ⟨3⟩ 'D' = "abc"` * `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"` -Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position, so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. +Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position, +so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. -/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String | ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩ /-- -Replaces the character at position `p` in the string `s` with the result of applying `f` to that character. If `p` is an invalid position, the string is returned unchanged. +Replaces the character at position `p` in the string `s` with the result of applying `f` to that character. +If `p` is an invalid position, the string is returned unchanged. Examples: * `abc.modify ⟨1⟩ Char.toUpper = "aBc"` @@ -164,7 +174,8 @@ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i /-- -Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, the result is unspecified. +Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, +the result is unspecified. Examples: * `"abc".next ⟨1⟩ = String.Pos.mk 2` From 48c0723b1ae513bcc68253f6a85a8e4765626391 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Tue, 7 May 2024 08:17:04 -0400 Subject: [PATCH 16/18] doc: improve unspecified examples for String.next Co-authored-by: David Thrane Christiansen --- src/Init/Data/String/Basic.lean | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index cd368215bdcd..0700a12cbb1b 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -183,10 +183,8 @@ Examples: `'∃'` is a mutli-byte character Cases where the result is unspecified: -* `"abc".next ⟨3⟩ = String.Pos.mk 4` -Since `3 = s.endPos`, the result is an invalid position -* `"L∃∀N".next ⟨2⟩ = String.Pos.mk 3` -Since `2` points into the middle of a multi-byte UTF-8 character the result is an invalid position +* `"abc".next ⟨3⟩`, since `3 = s.endPos` +* `"L∃∀N".next ⟨2⟩`, since `2` points into the middle of a multi-byte UTF-8 character -/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := From 1f0f5ebd05b00c02fb868eb94e96e7778513f108 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Tue, 7 May 2024 08:20:19 -0400 Subject: [PATCH 17/18] doc: fix typo Co-authored-by: David Thrane Christiansen --- src/Init/Data/String/Basic.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 0700a12cbb1b..01ee887e388e 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -180,7 +180,7 @@ the result is unspecified. Examples: * `"abc".next ⟨1⟩ = String.Pos.mk 2` * `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4` -`'∃'` is a mutli-byte character +`'∃'` is a multi-byte character Cases where the result is unspecified: * `"abc".next ⟨3⟩`, since `3 = s.endPos` From 198090adc18031d3edfeaccf77d97e0ac37f376e Mon Sep 17 00:00:00 2001 From: austinletson Date: Tue, 7 May 2024 08:33:13 -0400 Subject: [PATCH 18/18] doc: align String.next examples --- src/Init/Data/String/Basic.lean | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 01ee887e388e..1ea50f2e6869 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -179,8 +179,7 @@ the result is unspecified. Examples: * `"abc".next ⟨1⟩ = String.Pos.mk 2` -* `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4` -`'∃'` is a multi-byte character +* `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4`, since `'∃'` is a multi-byte UTF-8 character Cases where the result is unspecified: * `"abc".next ⟨3⟩`, since `3 = s.endPos`