-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from hmrc/TAV-242B
TAV-242: Remove address-reputation-store dependency.
- Loading branch information
Showing
81 changed files
with
3,004 additions
and
149 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
/* | ||
* Copyright 2021 HM Revenue & Customs | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package address.osgb | ||
|
||
import java.util | ||
import java.lang.{Integer => JInteger} | ||
import java.lang.{Short => JShort} | ||
|
||
import address.uk.Postcode | ||
|
||
import scala.annotation.tailrec | ||
import scala.collection.mutable | ||
|
||
trait Document { | ||
def tupled: List[(String, Any)] | ||
|
||
final def toMap = tupled.toMap | ||
|
||
def normalise: Document | ||
} | ||
|
||
/** | ||
* Address typically represents a postal address. | ||
* For UK addresses, 'town' will always be present. | ||
* For non-UK addresses, 'town' may be absent and there may be an extra line instead. | ||
*/ | ||
// id typically consists of some prefix and the uprn | ||
case class DbAddress( | ||
id: String, | ||
lines: List[String], | ||
town: Option[String], | ||
postcode: String, | ||
subdivision: Option[String], | ||
country: Option[String], | ||
localCustodianCode: Option[Int], | ||
language: Option[String], | ||
blpuState: Option[Int], | ||
logicalState: Option[Int], | ||
streetClass: Option[Int], | ||
blpuClass: Option[String], | ||
location: Option[String], | ||
poBox: Option[String] = None, | ||
administrativeArea: Option[String] = None | ||
) extends Document { | ||
|
||
// UPRN is specified to be an integer of up to 12 digits (it can also be assumed to be always positive) | ||
def uprn: Long = DbAddress.trimLeadingLetters(id).toLong | ||
|
||
def linesContainIgnoreCase(filterStr: String): Boolean = { | ||
val filter = filterStr.toUpperCase | ||
lines.map(_.toUpperCase).exists(_.contains(filter)) | ||
} | ||
|
||
def line1 = if (lines.nonEmpty) lines.head else "" | ||
|
||
def line2 = if (lines.size > 1) lines(1) else "" | ||
|
||
def line3 = if (lines.size > 2) lines(2) else "" | ||
|
||
def latLong: Option[LatLong] = LatLong(location) | ||
|
||
// For use as input to MongoDbObject (hence it's not a Map) | ||
def tupled: List[(String, Any)] = { | ||
List( | ||
"lines" -> lines, | ||
"postcode" -> postcode) ++ | ||
town.toList.map("town" -> _) ++ | ||
subdivision.toList.map("subdivision" -> _) ++ | ||
country.toList.map("country" -> _) ++ | ||
localCustodianCode.toList.map("localCustodianCode" -> _) ++ | ||
language.toList.map("language" -> _) ++ | ||
blpuState.toList.map("blpuState" -> _) ++ | ||
logicalState.toList.map("logicalState" -> _) ++ | ||
streetClass.toList.map("streetClass" -> _) ++ | ||
blpuClass.toList.map("blpuClass" -> _) ++ | ||
location.toList.map("location" -> _) ++ | ||
poBox.toList.map("poBox" -> _) ++ | ||
administrativeArea.toList.map("administrativeArea" -> _) | ||
} | ||
|
||
// We're still providing two structures for the lines, pending a decision on how ES will be used. | ||
def tupledFlat: List[(String, Any)] = { | ||
def optLine1 = if (lines.nonEmpty) List(lines.head) else Nil | ||
|
||
def optLine2 = if (lines.size > 1) List(lines(1)) else Nil | ||
|
||
def optLine3 = if (lines.size > 2) List(lines(2)) else Nil | ||
|
||
List( | ||
"postcode" -> postcode) ++ | ||
optLine1.map("line1" -> _) ++ | ||
optLine2.map("line2" -> _) ++ | ||
optLine3.map("line3" -> _) ++ | ||
town.toList.map("town" -> _) ++ | ||
subdivision.toList.map("subdivision" -> _) ++ | ||
country.toList.map("country" -> _) ++ | ||
localCustodianCode.toList.map("localCustodianCode" -> _) ++ | ||
language.toList.map("language" -> _) ++ | ||
blpuState.toList.map("blpuState" -> _) ++ | ||
logicalState.toList.map("logicalState" -> _) ++ | ||
streetClass.toList.map("streetClass" -> _) ++ | ||
blpuClass.toList.map("blpuClass" -> _) ++ | ||
location.toList.map("location" -> _) ++ | ||
poBox.toList.map("poBox" -> _) ++ | ||
administrativeArea.toList.map("administrativeArea" -> _) | ||
} | ||
|
||
def forMongoDb: List[(String, Any)] = tupled ++ List("_id" -> id) | ||
|
||
def splitPostcode = Postcode(postcode) | ||
|
||
def normalise = this | ||
} | ||
|
||
|
||
object DbAddress { | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
final val English = "en" | ||
final val Cymraeg = "cy" | ||
|
||
private def toInteger(v: Any): Int = | ||
v match { | ||
case i: Int => i | ||
case o: JInteger => o.toInt | ||
case s: JShort => s.toInt | ||
case _ => v.toString.toInt | ||
} | ||
|
||
private def toFloat(v: Any): Float = | ||
v match { | ||
case f: Float => f | ||
case _ => v.toString.toFloat | ||
} | ||
|
||
private def convertLines(lines: AnyRef): List[String] = { | ||
lines match { | ||
case jl: util.List[_] => (List() ++ jl.asScala).map(_.toString) | ||
case sl: List[_] => sl.map(_.toString) | ||
} | ||
} | ||
|
||
@tailrec | ||
private[osgb] def trimLeadingLetters(id: String): String = { | ||
if (id.isEmpty || Character.isDigit(id.head)) id | ||
else trimLeadingLetters(id.tail) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/* | ||
* Copyright 2021 HM Revenue & Customs | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package address.osgb | ||
|
||
object DbAddressOrderingByLine1 extends Ordering[DbAddress] { | ||
def compare(a: DbAddress, b: DbAddress) = { | ||
a.line1 compare b.line1 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright 2021 HM Revenue & Customs | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package address.osgb | ||
|
||
import util._ | ||
|
||
case class LatLong(lat: Double, long: Double) { | ||
def toLocation = lat.toString + "," + long.toString | ||
} | ||
|
||
object LatLong { | ||
def apply(location: Option[String]): Option[LatLong] = { | ||
if (location.isDefined) { | ||
val a = location.get.divide(',') | ||
Some(LatLong(a(0).toDouble, a(1).toDouble)) | ||
} else None | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/* | ||
* Copyright 2021 HM Revenue & Customs | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package address | ||
|
||
package object osgb { | ||
|
||
def blankToOption(s: String): Option[String] = if (s == null || s.isEmpty) None else Some(s) | ||
|
||
def trim(s: String): String = if (s == null) null else s.trim | ||
|
||
def removeTrailingCommaAndTrim(s: String): String = { | ||
val t = trim(s) | ||
if (t == null) null | ||
else if (t.endsWith(",")) t.init.trim | ||
else t | ||
} | ||
|
||
// def stripQuotes(s: String): String = { | ||
// if (s.startsWith("\"") && s.endsWith("\"")) | ||
// s.substring(1, s.length - 1).trim | ||
// else | ||
// s.trim | ||
// } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
/* | ||
* Copyright 2021 HM Revenue & Customs | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package address.services | ||
|
||
import scala.collection.immutable.HashSet | ||
|
||
|
||
object Capitalisation { | ||
|
||
def normaliseAddressLine(phrase: String*): String = normalise(phrase.map(_.trim.toLowerCase)) | ||
|
||
private def normalise(phrase: Seq[String]): String = { | ||
val words: Seq[String] = phrase.flatMap(_.split(' ').filterNot(_ == "")) | ||
|
||
if (words.isEmpty) "" | ||
else if (words.length == 1) asFirstWord(words.head) | ||
else asFirstWord(words.head) + words.tail.map(asOtherWord).mkString(" ", " ", "") | ||
} | ||
|
||
private def joinDashedWords(first: String, rest: Seq[String]): String = | ||
if (rest.isEmpty) first | ||
else first + rest.map(capitaliseRestOfSubwords).mkString("-", "-", "") | ||
|
||
private def splitOnDash(phrase: String): Seq[String] = phrase.split('-') | ||
|
||
private def asFirstWord(word: String): String = { | ||
val dashedPhrase = splitOnDash(word) | ||
if (dashedPhrase.nonEmpty) joinDashedWords(capitaliseFirstSubword(dashedPhrase.head), dashedPhrase.tail) else "-" | ||
} | ||
|
||
private def asOtherWord(word: String): String = { | ||
val dashedPhrase = splitOnDash(word) | ||
if (dashedPhrase.nonEmpty) joinDashedWords(capitaliseRestOfSubwords(dashedPhrase.head), dashedPhrase.tail) else "-" | ||
} | ||
|
||
private def capitaliseFirstSubword(word: String): String = | ||
acronymSpecialCases.get(word) match { | ||
case Some(specialCase) => specialCase | ||
case None => word.capitalize | ||
} | ||
|
||
private def capitaliseRestOfSubwords(word: String): String = | ||
if (stopWords.contains(word)) word else capitaliseSpecialCases(word) | ||
|
||
private def capitaliseSpecialCases(lcWord: String): String = | ||
subwordSpecialCases.get(lcWord) match { | ||
case Some(specialCase) => specialCase | ||
case None => capitaliseWithContractedPrefix(lcWord) | ||
} | ||
|
||
private def capitaliseWithContractedPrefix(word: String): String = | ||
if (word.length < 2) word.capitalize | ||
else { | ||
val two = word.substring(0, 2) | ||
if (contractedPrefixes.contains(two)) two.capitalize + word.substring(2).capitalize | ||
else word.capitalize | ||
} | ||
|
||
//----------------------------------------------------------------------------------------------- | ||
|
||
private val stopWords = HashSet( | ||
// English stop words | ||
"and", "at", "by", "cum", "in", "next", "of", "on", "the", "to", "under", "upon", "with", | ||
// "but" isn't included because it's often a proper name too | ||
// French loan words | ||
"de", "en", "la", "le", | ||
// Welsh stop words | ||
"y", "yr", | ||
// Gaelic and Cornish stop words | ||
"an", "na", "nam" | ||
) | ||
|
||
private val contractedPrefixes = HashSet("a'", "d'", "o'") | ||
|
||
private val subwordSpecialCases = Map( | ||
"i'anson" -> "I'Anson") // DL3 0RL | ||
|
||
private val acronymSpecialCases = Map( | ||
"bfpo" -> "BFPO") | ||
|
||
} |
Oops, something went wrong.