-
Notifications
You must be signed in to change notification settings - Fork 2
/
normalizer.go
128 lines (100 loc) · 3.98 KB
/
normalizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package addy
import "strings"
func normalize(s string, lookup map[string]string, normOpts normalizationOptions) (string, bool) {
lookupKey := strings.ToUpper(s)
normalized, foundInLookup := lookup[lookupKey]
// If we didn't find it in the lookup, assign the normalized value to what was passed in.
if !foundInLookup {
normalized = s
}
// This is the casing that we will apply to the normalized string.
casingToApply := normOpts.casingOption
// If we have the preserve casing option, we need to figure out the casing of the string that
// was passed in so we know how to case the normalized result.
if casingToApply == OptionPreserveCase {
// Fastpath: not found in the lookup so just return the original string.
if !foundInLookup {
return s, false
}
// This will be the default if they provided a token with some weird mixture
// of upper and lowercased characers. Titlecase is used by default because it is
// the only casing option that includes both upper and lowercase characters, so there
// is a chance that titlecase was the intended casing.
casingToApply = OptionTitleCase
// If applying these casings is equal to the original value, then we know that is
// the casing that is to be preserved.
if applyNormalizationCasingOption(s, OptionUpperCase) == s {
casingToApply = OptionUpperCase
} else if applyNormalizationCasingOption(s, OptionLowerCase) == s {
casingToApply = OptionLowerCase
}
}
normalized = applyNormalizationCasingOption(normalized, casingToApply)
return normalized, foundInLookup
}
func applyNormalizationCasingOption(s string, o option) (casingApplied string) {
// Now use the options to convert to the appropriate case.
switch o {
case OptionUpperCase:
casingApplied = strings.ToUpper(s)
case OptionLowerCase:
casingApplied = strings.ToLower(s)
case OptionTitleCase:
casingApplied = strings.Title(strings.ToLower(s))
}
return
}
// NormalizeAddress1 will return a version of the input argument with all directional
// and suffix keywords normalized, along with a count of how many tokens were normalized.
// The casing is uppercased by default, but can be specified using optional arguments.
// Only the first casing option encountered will be applied.
func NormalizeAddress1(address string, options ...option) (string, int) {
var (
tokensNormalized int
wasNormalized bool
)
normOpts := parseOptions(options...)
tokens := tokenize(address)
for _, token := range tokens {
// No need to normalize separators.
if token.isSeparator {
continue
}
// There should never be any overlap between things that can be normalized either directionally or
// by suffix; if not one, then try the other.
token.value, wasNormalized = normalizeDirectional(token.value, normOpts)
if !wasNormalized {
token.value, wasNormalized = normalizeSuffix(token.value, normOpts)
}
if wasNormalized {
tokensNormalized++
}
}
// All tokens have been normalized, or at least uppercased if no normalizations were applied.
// Join them all back together and return them.
return tokens.join(), tokensNormalized
}
// NormalizeAddress2 will return a version of the input argument with all secondary
// unit keywords normalized, along with a count of how many tokens were normalized.
// The casing is uppercased by default, but can be specified using optional arguments.
// Only the first casing option encountered will be applied.
func NormalizeAddress2(address string, options ...option) (string, int) {
var (
tokensNormalized int
wasNormalized bool
)
normOpts := parseOptions(options...)
tokens := tokenize(address)
for _, token := range tokens {
// No need to normalize separators.
if token.isSeparator {
continue
}
if token.value, wasNormalized = normalizeSecondaryUnit(token.value, normOpts); wasNormalized {
tokensNormalized++
}
}
// All tokens have been normalized, or at least uppercased if no normalizations were applied.
// Join them all back together and return them.
return tokens.join(), tokensNormalized
}