Skip to content

Commit

Permalink
feat: sort mappings using a pre-defined alphabetical ordering on thei…
Browse files Browse the repository at this point in the history
…r pronunciation fields,

instaed of putting all the accented pronunciation characters at the end
  • Loading branch information
CicadaCinema committed Aug 6, 2023
1 parent 5bd5c42 commit 9c94e81
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 10 deletions.
145 changes: 135 additions & 10 deletions lib/src/data_structures/data_structures.dart
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import 'dart:collection';
import 'dart:math';

import 'package:autoglosser/src/linkedlist_mapping_converter.dart';
import 'package:collection/collection.dart';
Expand All @@ -14,6 +15,95 @@ import '../linkedlist_word_converter.dart';

part 'data_structures.g.dart';

const characterMap = {
'A': 1,
'a': 2,
'Ā': 3,
'ā': 4,
'Á': 5,
'á': 6,
'Ǎ': 7,
'ǎ': 8,
'À': 9,
'à': 10,
'B': 11,
'b': 12,
'C': 13,
'c': 14,
'D': 15,
'd': 16,
'E': 17,
'e': 18,
'Ē': 19,
'ē': 20,
'É': 21,
'é': 22,
'Ě': 23,
'ě': 24,
'È': 25,
'è': 26,
'F': 27,
'f': 28,
'G': 29,
'g': 30,
'H': 31,
'h': 32,
'I': 33,
'i': 34,
'ī': 35,
'í': 36,
'ǐ': 37,
'ì': 38,
'J': 39,
'j': 40,
'K': 41,
'k': 42,
'L': 43,
'l': 44,
'M': 45,
'm': 46,
'N': 47,
'n': 48,
'O': 49,
'o': 50,
'ō': 51,
'ó': 52,
'ǒ': 53,
'ò': 54,
'P': 55,
'p': 56,
'Q': 57,
'q': 58,
'R': 59,
'r': 60,
'S': 61,
's': 62,
'T': 63,
't': 64,
'U': 65,
'u': 66,
'ū': 67,
'ú': 68,
'ǔ': 69,
'ù': 70,
'Ü': 71,
'ü': 72,
'ǖ': 73,
'ǘ': 74,
'ǚ': 75,
'ǜ': 76,
'V': 77,
'v': 78,
'W': 79,
'w': 80,
'X': 81,
'x': 82,
'Y': 83,
'y': 84,
'Z': 85,
'z': 86,
};

// TODO: get rid of this and use the enum below instead, like in the settings page
const breakKinds = [
'no break',
Expand Down Expand Up @@ -142,7 +232,8 @@ class FullText {
}

@JsonSerializable()
final class Mapping extends LinkedListEntry<Mapping> {
final class Mapping extends LinkedListEntry<Mapping>
implements Comparable<Mapping> {
// TODO: remove thees three setters (make them private instead), instead force the user to go through the [FullMap] interface.
String pronounciation;
String source;
Expand Down Expand Up @@ -172,6 +263,42 @@ final class Mapping extends LinkedListEntry<Mapping> {
_$MappingFromJson(json);

Map<String, dynamic> toJson() => _$MappingToJson(this);

@override
int compareTo(Mapping other) {
// Iterate over the common indexes in the two pronunciation strings.
for (int i = 0;
i < min(pronounciation.length, other.pronounciation.length);
i++) {
final charComparison = switch ((
characterMap[pronounciation[i]],
characterMap[other.pronounciation[i]],
)) {
// If both characters are in the map, compare them using their inexes.
(int thisCharIndex, int otherCharIndex) =>
thisCharIndex.compareTo(otherCharIndex),
// If one character is not in the map, it should be ordered last.
// Here, `this` should come first.
(int _, null) => -1,
// Here, `other` should come first.
(null, int _) => 1,
// If neither character is in the map, then use the existing [String] logic for this.
(null, null) => pronounciation[i].compareTo(other.pronounciation[i]),
};

// In this case, one character clearly comes before the other.
// Otherwise, continue the loop.
if (charComparison != 0) {
return charComparison;
}
}

// If we have reached this point, it means that one pronunciation is a substring of the other.
// The shorter string lexicographically precedes the longer string.
// For example, if `this` is longer than `other`, then a positive integer will be returned,
// since `other` is the shorter string and `this` is ordered after `other`.
return pronounciation.length - other.pronounciation.length;
}
}

@JsonSerializable(
Expand Down Expand Up @@ -284,7 +411,7 @@ class FullMap {
// have been migrated (overwritten with a version where this field is sorted).
keyOfMapping(Mapping mapping) => mapping.pronounciation;
for (final section in mappingSections.values) {
if (section.isSortedBy(keyOfMapping)) {
if (section.isSorted((a, b) => a.compareTo(b))) {
// If this section is sorted, we don't have to do anything.
continue;
}
Expand All @@ -296,7 +423,7 @@ class FullMap {
}

// Sort the array, then add back the mapping elements to the linked list.
sectionList.sortBy(keyOfMapping);
sectionList.sort();
assert(section.isEmpty);
section.addAll(sectionList);
}
Expand All @@ -319,10 +446,9 @@ class FullMap {
}

extension InsertMappingIntoLinkedList on LinkedList<Mapping> {
/// Assuming that this [LinkedList] is sorted by applying the default compare
/// function to [Mapping.pronounciation], insert [mapping] into the linked
/// list, preserving the sort order. [mapping] must not already be in any
/// linked list.
/// Assuming that this [LinkedList] is sorted, insert [mapping] into the
/// linked list, preserving the sort order. [mapping] must not already be in
/// any linked list.
void insertPreservingSort(Mapping mapping) {
// If the list is empty to begin with, this operation is trivial.
if (isEmpty) {
Expand All @@ -332,7 +458,7 @@ extension InsertMappingIntoLinkedList on LinkedList<Mapping> {

// A special case is when the new element must be inserted at the beginning
// of the list.
if (mapping.pronounciation.compareTo(first.pronounciation) <= 0) {
if (mapping.compareTo(first) <= 0) {
// In this case, mapping is ordered before first, or they are equivalent.
first.insertBefore(mapping);
return;
Expand All @@ -342,8 +468,7 @@ extension InsertMappingIntoLinkedList on LinkedList<Mapping> {
// We also know that [mapping] must be inserted after [first].

var other = first;
while (other.next != null &&
mapping.pronounciation.compareTo(other.next!.pronounciation) > 0) {
while (other.next != null && mapping.compareTo(other.next!) > 0) {
// Now we know that there is yet another element in the linked list after
// other. We also know that [mapping] must be inserted after [other.next],
// so it is certainly not inserted immediately after [other].
Expand Down
107 changes: 107 additions & 0 deletions test/data_structures_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -391,4 +391,111 @@ void main() {
);
},
);

test(
'Mappings should be sorted using our custom mapping. This does not test words which use letters outside the mapping.',
() {
// https://stackoverflow.com/a/56118115/14464173
int compareGroundTruthStackoverflow(String a, String b) {
late int charAint;
late int charBint;
int min = a.length;
if (b.length < a.length) min = b.length;
for (int i = 0; i < min; ++i) {
String charA = a[i];
String charB = b[i];
if (characterMap.containsKey(charA)) {
charAint = characterMap[charA]!;
}
if (characterMap.containsKey(charB)) {
charBint = characterMap[charB]!;
}
if (charAint > charBint) {
return 1;
} else if (charAint < charBint) {
return -1;
}
}
if (a.length < b.length) {
return -1;
} else if (a.length > b.length) {
return 1;
}
return 0;
}

// The letters available to use.
final letters = characterMap.keys.toList();

// This list is populated with all zero-, one- and two-letter words.
final testWords = <String>[''];
for (final letter1 in letters) {
testWords.add(letter1);
testWords.addAll(letters.map((letter2) => letter1 + letter2));
}

// Generate a mapping from each word.
final testMappings = testWords.map((w) => Mapping(
source: '',
translation: [''],
pronounciation: w,
));

// Ensure our result matches the result of the ground truth sorting function.
for (final mapping1 in testMappings) {
for (final mapping2 in testMappings) {
expect(
mapping1.compareTo(mapping2).sign,
equals(compareGroundTruthStackoverflow(
mapping1.pronounciation,
mapping2.pronounciation,
).sign),
);
}
}

// Add some words from outside the mapping.
testWords.addAll([
'!',
'!!',
'!*',
'!+',
'!-',
'*',
'*!',
'**',
'*+',
'*-',
'+',
'+!',
'+*',
'++',
'+-',
'-',
'-!',
'-*',
'-+',
'--',
]);

// Reverse the mappings, then sort them.
final reversedMappings = testWords.reversed
.map((w) => Mapping(
source: '',
translation: [''],
pronounciation: w,
))
.toList();
reversedMappings.sort();

// We expect to get back the list of words in sorted order (the order in which the list of words was created).
expect(
const ListEquality().equals(
reversedMappings.map((m) => m.pronounciation).toList(),
testWords,
),
isTrue,
);
},
);
}

0 comments on commit 9c94e81

Please sign in to comment.