Skip to content

Commit

Permalink
Improved Natural Order
Browse files Browse the repository at this point in the history
Data generator for benchmarking
  • Loading branch information
jdunkerley committed Feb 14, 2022
1 parent 16a7ec7 commit 61b18fd
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ type Generator
range has an equal chance of occurring.
type Deterministic_Random

## A determinstic random noise generator that performs a peterbation of the
## A deterministic random noise generator that performs a perturbation of the
input

It produices what is commonly termed as "white" noise, where any value in
It produces what is commonly termed as "white" noise, where any value in
the range has an equal chance of occurring.
type Deterministic_Random

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import Standard.Base.Data.Text.Regex
import Standard.Base.Data.Text.Regex.Mode
import Standard.Base.Data.Ordering.Vector_Lexicographic_Order

polyglot java import org.enso.base.Text_Utils
polyglot java import com.ibm.icu.text.BreakIterator

## Compares two text values according to the natural dictionary ordering.

> Example
Expand All @@ -17,62 +20,55 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"]
compare : Text -> Text -> Ordering
compare text1 text2 =
## TODO [RW] A more efficient algorithm which works better for longer texts
should be implemented, as described in
https://www.pivotaltracker.com/story/show/181176589
nat1 = here.to_natural_key text1
nat2 = here.to_natural_key text2
## TODO [RW] The additional assignment to a temporary variable is a
workaround for the following bug:
https://www.pivotaltracker.com/story/show/181162108
res = Vector_Lexicographic_Order.compare nat1 nat2
res

## PRIVATE
to_natural_key : Text -> Vector
to_natural_key text = if text.is_empty then [] else
splitter_regex = Regex.compile "([^0-9]+|[0-9]+)"
parts = splitter_regex.find text mode=Mode.All

## TODO [RW] Currently there is no `is_digit` method. Once
https://www.pivotaltracker.com/story/show/181176532 is implemented, this
regex can be replaced with a simpler and faster `is_digit` check.
is_integer_regex = Regex.compile "[0-9]+"
parts.map part-> case is_integer_regex.matches part of
True ->
## TODO [RW] Currently there is no `Integer.parse` method, so we
parse a decimal and convert it to an integer. Once
https://www.pivotaltracker.com/story/show/181176522 is
implemented, this should be changed to use `Integer.parse`.
value = Decimal.parse part . floor
Natural_Ordering_Numeric_Part value part
False ->
Natural_Ordering_Alphabetical_Part part

## PRIVATE
type Natural_Ordering_Part
## PRIVATE
type Natural_Ordering_Alphabetical_Part (text : Text)

## PRIVATE
type Natural_Ordering_Numeric_Part (value : Integer) (original_text : Text)

## PRIVATE
compare_to : Natural_Ordering_Part -> Ordering
compare_to that = case Pair this that of
Pair (Natural_Ordering_Alphabetical_Part text1) (Natural_Ordering_Alphabetical_Part text2) ->
text1 . compare_to text2
Pair (Natural_Ordering_Numeric_Part value1 original1) (Natural_Ordering_Numeric_Part value2 original2) ->
value_ordering = value1.compare_to value2
case value_ordering of
Ordering.Equal -> original1 . compare_to original2
_ -> value_ordering
Pair (Natural_Ordering_Numeric_Part _ _) (Natural_Ordering_Alphabetical_Part _) ->
Ordering.Less
Pair (Natural_Ordering_Alphabetical_Part _) (Natural_Ordering_Numeric_Part _ _) ->
Ordering.Greater

to_text : Text
to_text = case this of
Natural_Ordering_Alphabetical_Part text -> text.to_text
Natural_Ordering_Numeric_Part v o -> [v, o].to_text
iter1 = BreakIterator.getCharacterInstance
iter1.setText text1

iter2 = BreakIterator.getCharacterInstance
iter2.setText text2

is_digit=c -> c>=48 && c<=57

get_number text prev next iter =
find_number text next iter =
new_next = iter.next
if (new_next == -1) then -next else
substring = Text_Utils.substring text next new_next
c = Text_Utils.get_chars substring . at 0
if (is_digit c).not then next else
@Tail_Call find_number text new_next iter

n = find_number text next iter
s = Text_Utils.substring text prev n.abs
d = Decimal.parse s . floor
i = if n < 0 then -1 else iter.current
[s, d, n, i]


## Ordering: Nothing < Number < Text
order prev1 next1 prev2 next2 =
case (next1 == -1) of
True ->
if (next2 == -1) then Ordering.Equal else Ordering.Less
False ->
if (next2 == -1) then Ordering.Greater else
s1 = Text_Utils.substring text1 prev1 next1
c1 = Text_Utils.get_chars s1 . at 0

s2 = Text_Utils.substring text2 prev2 next2
c2 = Text_Utils.get_chars s2 . at 0

case (is_digit c1) of
True ->
if (is_digit c2).not then Ordering.Less else
a1 = get_number text1 prev1 next1 iter1
a2 = get_number text2 prev2 next2 iter2

if (a1.at 1) != (a2.at 1) then (a1.at 1).compare_to (a2.at 1) else
if (a1.at 0) != (a2.at 0) then (a1.at 0).compare_to (a2.at 0) else
@Tail_Call order (a1.at 2) (a1.at 3) (a2.at 2) (a2.at 3)
False ->
if (is_digit c2) then Ordering.Greater else
if s2 != s1 then s1.compare_to s2 else
@Tail_Call order next1 iter1.next next2 iter2.next

order 0 iter1.next 0 iter2.next
38 changes: 38 additions & 0 deletions distribution/lib/Standard/Test/0.0.0-dev/src/Faker.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from Standard.Base import all

polyglot java import java.util.Random

upper_case_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf_8
numbers = "0123456789".utf_8

## Creates a random number generator which can be used for creating test values.

Arguments:
- seed: Optional seed value to make the sequence deterministic
make_generator : Integer -> Random
make_generator (seed = 0) =
if seed == 0 then Random.new else Random.new seed


## Creates a random string based on a template and random number generator.

Arguments:
- template: Vector of character arrays that represent the possible
characters for each letter.
- generator: Random number generator

> Examples:
Creates a fake UK National Insurance number:

l = "ABCEGHJKLMNOPRSTWXYZ".utf_8
n = "0123456789".utf_8
s = "ABCDFMP ".utf_8
template = [l, l, n, n, n, n, n, s]
ni_number = make_string template make_generator
make_string : Vector -> Any -> Text
make_string template generator =
output = Array.new template.length
0.up_to template.length . each i->
a = template.at i
output.set_at i (a.at (generator.nextInt a.length))
Text_Utils.from_utf_8 output

0 comments on commit 61b18fd

Please sign in to comment.