Skip to content

Commit

Permalink
Geoip database update implementation (opensearch-project#4105)
Browse files Browse the repository at this point in the history
* Geoip processor implementation

Signed-off-by: Asif Sohail Mohammed <[email protected]>
  • Loading branch information
asifsmohammed authored Feb 21, 2024
1 parent 680ad7a commit bb494de
Show file tree
Hide file tree
Showing 68 changed files with 3,533 additions and 1,461 deletions.
142 changes: 43 additions & 99 deletions data-prepper-plugins/geoip-processor/build.gradle
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

plugins{
id 'de.undercouch.download' version '4.1.2'
id 'de.undercouch.download' version '5.5.0'
}
apply plugin: 'de.undercouch.download'

import de.undercouch.gradle.tasks.download.Download

/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

dependencies {
implementation project(':data-prepper-api')
implementation project(path: ':data-prepper-plugins:common')
Expand All @@ -19,131 +19,75 @@ dependencies {
implementation libs.commons.compress
implementation 'org.mapdb:mapdb:3.0.8'
implementation libs.commons.io
implementation 'software.amazon.awssdk:aws-sdk-java:2.20.67'
implementation 'software.amazon.awssdk:sts'
implementation 'software.amazon.awssdk:s3-transfer-manager'
implementation 'software.amazon.awssdk.crt:aws-crt:0.21.17'
implementation 'software.amazon.awssdk.crt:aws-crt:0.29.9'
implementation 'com.maxmind.geoip2:geoip2:4.0.1'
implementation 'com.maxmind.db:maxmind-db:3.0.0'
implementation 'org.hibernate.validator:hibernate-validator:8.0.1.Final'

implementation libs.commons.lang3

testImplementation project(':data-prepper-core')
testImplementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310'
testImplementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml'
testImplementation project(':data-prepper-test-common')
}
def geoIP2='GeoIP2'
def geoLite2= 'GeoLite2'
task downloadFile(type: Download) {

def urls = [
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-City-Test.mmdb',
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-Country-Test.mmdb',
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoLite2-ASN-Test.mmdb'
]
def mmdbFileExtension = '.mmdb'
def baseDirPath = 'src/test/resources/mmdb-file/geo-lite2/'

urls.each { url ->
src(url)
dest(baseDirPath)
doLast {

def testFileName = url.substring(url.lastIndexOf('/') + 1)
def testMmdbSubString = testFileName.substring(testFileName.lastIndexOf('-'))
def fileName = testFileName.substring(0, testFileName.length() - testMmdbSubString.length())
def downloadFiles = tasks.register('downloadFiles')

if(fileName.contains(geoIP2)) {
fileName = fileName.replace(geoIP2, geoLite2)
}
File sourceFile = file(baseDirPath+testFileName)
File destinationFile = file( baseDirPath+fileName+mmdbFileExtension)
sourceFile.renameTo(destinationFile)
def databaseNames = [
'GeoLite2-City-Test',
'GeoLite2-Country-Test',
'GeoLite2-ASN-Test'
]

}
databaseNames.forEach { databaseName -> {

def url = "https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/${databaseName}.mmdb"
def gradleName = databaseName.replaceAll('-', '')
def downloadTask = tasks.register("download${gradleName}", Download) {
src(url)
dest "build/resources/test/mmdb-files/geo-lite2/${databaseName}.mmdb"
overwrite true
}
downloadFiles.get().dependsOn downloadTask
}}

def enterpriseDatabaseNames = [
'GeoIP2-Enterprise-Test'
]

}
task downloadEnterpriseFile(type: Download) {
dependsOn downloadFile
def urls = [
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-Enterprise-Test.mmdb'
]
def mmdbFileExtension = '.mmdb'
def baseDirPath = 'src/test/resources/mmdb-file/geo-enterprise/'

urls.each { url ->
src(url)
def testFileName = url.substring(url.lastIndexOf('/') + 1)
def testMmdbSubString = testFileName.substring(testFileName.lastIndexOf('-'))
def fileName = testFileName.substring(0, testFileName.length() - testMmdbSubString.length())

dest(baseDirPath+testFileName)
doLast {
if(fileName.contains(geoIP2)) {
fileName = fileName.replace(geoIP2, geoLite2)
}
File sourceFile = file(baseDirPath+testFileName)
File destinationFile = file( baseDirPath+fileName+mmdbFileExtension)
sourceFile.renameTo(destinationFile)
}
enterpriseDatabaseNames.forEach { enterpriseDatabaseName -> {

def url = "https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/${enterpriseDatabaseName}.mmdb"
def gradleName = enterpriseDatabaseName.replaceAll('-', '')
def downloadEnterpriseTask = tasks.register("download${gradleName}", Download) {
src(url)
dest "build/resources/test/mmdb-files/geo-ip2/${enterpriseDatabaseName}.mmdb"
overwrite true
}
downloadFiles.get().dependsOn downloadEnterpriseTask
}}

}

/*task processTestResources(type: Copy) {
dependsOn downloadEnterpriseFile
from 'src/test/resources' // Source directory containing test resources
into 'build/resources/test' // Destination directory for processed test resources
}*/
tasks.test.dependsOn 'processTestResources'
tasks.processTestResources.dependsOn 'downloadEnterpriseFile'
test {
useJUnitPlatform()
dependsOn(downloadFiles)
}

checkstyleTest {
dependsOn(downloadFiles)
}

jacocoTestCoverageVerification {
dependsOn jacocoTestReport
violationRules {
rule {
limit {
minimum = 0.1 // temporarily reduce coverage for the builds to pass
minimum = 0.85
}
}
}
}

check.dependsOn jacocoTestCoverageVerification

sourceSets {
integrationTest {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
srcDir file('src/integrationTest/java')
}
resources.srcDir file('src/integrationTest/resources')
}
}

configurations {
integrationTestImplementation.extendsFrom testImplementation
integrationTestRuntime.extendsFrom testRuntime
}

task integrationTest(type: Test) {
group = 'verification'
testClassesDirs = sourceSets.integrationTest.output.classesDirs

useJUnitPlatform()

classpath = sourceSets.integrationTest.runtimeClasspath
systemProperty 'tests.geoipProcessor.maxmindLicenseKey', System.getProperty('tests.geoipProcessor.maxmindLicenseKey')

filter {
includeTestsMatching '*IT'
}
}
check.dependsOn jacocoTestCoverageVerification
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void setUp() throws JsonProcessingException {

public GeoIPProcessorService createObjectUnderTest() {
// TODO: pass in geoIpServiceConfig object
return new GeoIPProcessorService(null);
return new GeoIPProcessorService(null, null, null);
}

@Test
Expand All @@ -93,7 +93,7 @@ void verify_enrichment_of_data_from_maxmind_url() throws UnknownHostException {
if (IPValidationCheck.isPublicIpAddress(ipAddress)) {
InetAddress inetAddress = InetAddress.getByName(ipAddress);
//All attributes are considered by default with the null value
geoData = geoIPProcessorService.getGeoData(inetAddress, null);
// geoData = geoIPProcessorService.getGeoData(inetAddress, null);

assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("ip"), equalTo("8.8.8.8"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor;

public enum GeoIPDatabase {
CITY,
COUNTRY,
ASN,
ENTERPRISE;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public enum GeoIPField {
CONTINENT_CODE("continent_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
CONTINENT_NAME("continent_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_NAME("country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
IS_COUNTRY_IN_EUROPEAN_UNION("is_country_in_european_union", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_ISO_CODE("country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_CONFIDENCE("country_confidence", GeoIPDatabase.ENTERPRISE),
REGISTERED_COUNTRY_NAME("registered_country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REGISTERED_COUNTRY_ISO_CODE("registered_country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_NAME("represented_country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_ISO_CODE("represented_country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_TYPE("represented_country_type", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
CITY_NAME("city_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
CITY_CONFIDENCE("city_confidence", GeoIPDatabase.ENTERPRISE),
LOCATION("location", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LATITUDE("latitude", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LONGITUDE("longitude", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LOCATION_ACCURACY_RADIUS("location_accuracy_radius", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
METRO_CODE("metro_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
TIME_ZONE("time_zone", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
POSTAL_CODE("postal_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
POSTAL_CODE_CONFIDENCE("postal_code_confidence", GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_NAME("most_specified_subdivision_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_ISO_CODE("most_specified_subdivision_iso_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_CONFIDENCE("most_specified_subdivision_confidence", GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_NAME("least_specified_subdivision_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_ISO_CODE("least_specified_subdivision_iso_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_CONFIDENCE("least_specified_subdivision_confidence", GeoIPDatabase.ENTERPRISE),

ASN("asn", GeoIPDatabase.ASN),
ASN_ORGANIZATION("asn_organization", GeoIPDatabase.ASN),
NETWORK("network", GeoIPDatabase.ASN),
IP("ip", GeoIPDatabase.ASN);

private final HashSet<GeoIPDatabase> geoIPDatabases;
private final String fieldName;

GeoIPField(final String fieldName, final GeoIPDatabase... geoIPDatabases) {
this.fieldName = fieldName;
this.geoIPDatabases = new HashSet<>(Arrays.asList(geoIPDatabases));
}

public static GeoIPField findByName(final String name) {
GeoIPField result = null;
for (GeoIPField geoIPField : values()) {
if (geoIPField.getFieldName().equalsIgnoreCase(name)) {
result = geoIPField;
break;
}
}
return result;
}

public String getFieldName() {
return fieldName;
}

public Set<GeoIPDatabase> getGeoIPDatabases() {
return geoIPDatabases;
}
}
Loading

0 comments on commit bb494de

Please sign in to comment.