Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed how geoip cache is integrated with geoip processor. #68581

Merged
merged 3 commits into from
Feb 11, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,26 @@
package org.elasticsearch.ingest.geoip;

import com.maxmind.geoip2.DatabaseReader;
import com.maxmind.geoip2.exception.AddressNotFoundException;
import com.maxmind.geoip2.model.AsnResponse;
import com.maxmind.geoip2.model.CityResponse;
import com.maxmind.geoip2.model.CountryResponse;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.core.internal.io.IOUtils;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Objects;

/**
Expand All @@ -31,14 +39,16 @@ class DatabaseReaderLazyLoader implements Closeable {

private static final Logger LOGGER = LogManager.getLogger(DatabaseReaderLazyLoader.class);

private final GeoIpCache cache;
private final Path databasePath;
private final CheckedSupplier<DatabaseReader, IOException> loader;
final SetOnce<DatabaseReader> databaseReader;

// cache the database type so that we do not re-read it on every pipeline execution
final SetOnce<String> databaseType;

DatabaseReaderLazyLoader(final Path databasePath, final CheckedSupplier<DatabaseReader, IOException> loader) {
DatabaseReaderLazyLoader(final GeoIpCache cache, final Path databasePath, final CheckedSupplier<DatabaseReader, IOException> loader) {
this.cache = cache;
this.databasePath = Objects.requireNonNull(databasePath);
this.loader = Objects.requireNonNull(loader);
this.databaseReader = new SetOnce<>();
Expand Down Expand Up @@ -123,7 +133,49 @@ InputStream databaseInputStream() throws IOException {
return Files.newInputStream(databasePath);
}

DatabaseReader get() throws IOException {
CityResponse getCity(InetAddress ipAddress) {
SpecialPermission.check();
return AccessController.doPrivileged((PrivilegedAction<CityResponse>) () ->
cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
try {
return get().city(ip);
} catch (AddressNotFoundException e) {
throw new GeoIpProcessor.AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));
}

CountryResponse getCountry(InetAddress ipAddress) {
SpecialPermission.check();
return AccessController.doPrivileged((PrivilegedAction<CountryResponse>) () ->
cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
try {
return get().country(ip);
} catch (AddressNotFoundException e) {
throw new GeoIpProcessor.AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));
}

AsnResponse getAsn(InetAddress ipAddress) {
SpecialPermission.check();
return AccessController.doPrivileged((PrivilegedAction<AsnResponse>) () ->
cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
try {
return get().asn(ip);
} catch (AddressNotFoundException e) {
throw new GeoIpProcessor.AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe extract common functionality to separate method:

private <T extends AbstractResponse> T getResponse(InetAddress ipAddress, CheckedBiFunction<DatabaseReader, InetAddress, T,
    Exception> responseProvider) {
    SpecialPermission.check();
    return AccessController.doPrivileged((PrivilegedAction<T>) () ->
        cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
            try {
                return responseProvider.apply(get(), ipAddress);
            } catch (AddressNotFoundException e) {
                throw new GeoIpProcessor.AddressNotFoundRuntimeException(e);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }));
}

CityResponse getCity(InetAddress ipAddress) {
    return getResponse(ipAddress, DatabaseReader::city);
}

CountryResponse getCountry(InetAddress ipAddress) {
    return getResponse(ipAddress, DatabaseReader::country);
}

AsnResponse getAsn(InetAddress ipAddress) {
    return getResponse(ipAddress, DatabaseReader::asn);
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea!


private DatabaseReader get() throws IOException {
if (databaseReader.get() == null) {
synchronized (databaseReader) {
if (databaseReader.get() == null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.ingest.geoip;

import com.maxmind.db.NodeCache;
import com.maxmind.geoip2.model.AbstractResponse;
import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder;

import java.net.InetAddress;
import java.util.Objects;
import java.util.function.Function;

/**
* The in-memory cache for the geoip data. There should only be 1 instance of this class..
* This cache differs from the maxmind's {@link NodeCache} such that this cache stores the deserialized Json objects to avoid the
* cost of deserialization for each lookup (cached or not). This comes at slight expense of higher memory usage, but significant
* reduction of CPU usage.
*/
final class GeoIpCache {
private final Cache<CacheKey, AbstractResponse> cache;

//package private for testing
GeoIpCache(long maxSize) {
if (maxSize < 0) {
throw new IllegalArgumentException("geoip max cache size must be 0 or greater");
}
this.cache = CacheBuilder.<CacheKey, AbstractResponse>builder().setMaximumWeight(maxSize).build();
}

@SuppressWarnings("unchecked")
<T extends AbstractResponse> T putIfAbsent(InetAddress ip,
String databasePath,
Function<InetAddress, AbstractResponse> retrieveFunction) {

//can't use cache.computeIfAbsent due to the elevated permissions for the jackson (run via the cache loader)
CacheKey cacheKey = new CacheKey(ip, databasePath);
//intentionally non-locking for simplicity...it's OK if we re-put the same key/value in the cache during a race condition.
AbstractResponse response = cache.get(cacheKey);
if (response == null) {
response = retrieveFunction.apply(ip);
cache.put(cacheKey, response);
}
return (T) response;
}

//only useful for testing
AbstractResponse get(InetAddress ip, String databasePath) {
CacheKey cacheKey = new CacheKey(ip, databasePath);
return cache.get(cacheKey);
}

/**
* The key to use for the cache. Since this cache can span multiple geoip processors that all use different databases, the database
* path is needed to be included in the cache key. For example, if we only used the IP address as the key the City and ASN the same
* IP may be in both with different values and we need to cache both.
*/
private static class CacheKey {

private final InetAddress ip;
private final String databasePath;

private CacheKey(InetAddress ip, String databasePath) {
this.ip = ip;
this.databasePath = databasePath;
}

//generated
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CacheKey cacheKey = (CacheKey) o;
return Objects.equals(ip, cacheKey.ip) &&
Objects.equals(databasePath, cacheKey.databasePath);
}

//generated
@Override
public int hashCode() {
return Objects.hash(ip, databasePath);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package org.elasticsearch.ingest.geoip;

import com.maxmind.db.Network;
import com.maxmind.geoip2.exception.AddressNotFoundException;
import com.maxmind.geoip2.model.AsnResponse;
import com.maxmind.geoip2.model.CityResponse;
import com.maxmind.geoip2.model.CountryResponse;
Expand All @@ -19,18 +18,14 @@
import com.maxmind.geoip2.record.Location;
import com.maxmind.geoip2.record.Subdivision;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.geoip.IngestGeoIpPlugin.GeoIpCache;

import java.io.IOException;
import java.net.InetAddress;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -58,7 +53,6 @@ public final class GeoIpProcessor extends AbstractProcessor {
private final DatabaseReaderLazyLoader lazyLoader;
private final Set<Property> properties;
private final boolean ignoreMissing;
private final GeoIpCache cache;
private final boolean firstOnly;

/**
Expand All @@ -70,7 +64,6 @@ public final class GeoIpProcessor extends AbstractProcessor {
* @param targetField the target field
* @param properties the properties; ideally this is lazily-loaded once on first use
* @param ignoreMissing true if documents with a missing value for the field should be ignored
* @param cache a geo-IP cache
* @param firstOnly true if only first result should be returned in case of array
*/
GeoIpProcessor(
Expand All @@ -80,15 +73,13 @@ public final class GeoIpProcessor extends AbstractProcessor {
final String targetField,
final Set<Property> properties,
final boolean ignoreMissing,
final GeoIpCache cache,
boolean firstOnly) {
super(tag, description);
this.field = field;
this.targetField = targetField;
this.lazyLoader = lazyLoader;
this.properties = properties;
this.ignoreMissing = ignoreMissing;
this.cache = cache;
this.firstOnly = firstOnly;
}

Expand Down Expand Up @@ -190,18 +181,7 @@ Set<Property> getProperties() {
}

private Map<String, Object> retrieveCityGeoData(InetAddress ipAddress) {
SpecialPermission.check();
CityResponse response = AccessController.doPrivileged((PrivilegedAction<CityResponse>) () ->
cache.putIfAbsent(ipAddress, CityResponse.class, ip -> {
try {
return lazyLoader.get().city(ip);
} catch (AddressNotFoundException e) {
throw new AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));

CityResponse response = lazyLoader.getCity(ipAddress);
Country country = response.getCountry();
City city = response.getCity();
Location location = response.getLocation();
Expand Down Expand Up @@ -276,18 +256,7 @@ private Map<String, Object> retrieveCityGeoData(InetAddress ipAddress) {
}

private Map<String, Object> retrieveCountryGeoData(InetAddress ipAddress) {
SpecialPermission.check();
CountryResponse response = AccessController.doPrivileged((PrivilegedAction<CountryResponse>) () ->
cache.putIfAbsent(ipAddress, CountryResponse.class, ip -> {
try {
return lazyLoader.get().country(ip);
} catch (AddressNotFoundException e) {
throw new AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));

CountryResponse response = lazyLoader.getCountry(ipAddress);
Country country = response.getCountry();
Continent continent = response.getContinent();

Expand Down Expand Up @@ -321,18 +290,7 @@ private Map<String, Object> retrieveCountryGeoData(InetAddress ipAddress) {
}

private Map<String, Object> retrieveAsnGeoData(InetAddress ipAddress) {
SpecialPermission.check();
AsnResponse response = AccessController.doPrivileged((PrivilegedAction<AsnResponse>) () ->
cache.putIfAbsent(ipAddress, AsnResponse.class, ip -> {
try {
return lazyLoader.get().asn(ip);
} catch (AddressNotFoundException e) {
throw new AddressNotFoundRuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}));

AsnResponse response = lazyLoader.getAsn(ipAddress);
Integer asn = response.getAutonomousSystemNumber();
String organization_name = response.getAutonomousSystemOrganization();
Network network = response.getNetwork();
Expand Down Expand Up @@ -381,11 +339,8 @@ Map<String, DatabaseReaderLazyLoader> databaseReaders() {
return Collections.unmodifiableMap(databaseReaders);
}

private final GeoIpCache cache;

public Factory(Map<String, DatabaseReaderLazyLoader> databaseReaders, GeoIpCache cache) {
public Factory(Map<String, DatabaseReaderLazyLoader> databaseReaders) {
this.databaseReaders = databaseReaders;
this.cache = cache;
}

@Override
Expand Down Expand Up @@ -432,8 +387,7 @@ public GeoIpProcessor create(
}
}

return new GeoIpProcessor(processorTag, description, ipField, lazyLoader, targetField, properties, ignoreMissing, cache,
firstOnly);
return new GeoIpProcessor(processorTag, description, ipField, lazyLoader, targetField, properties, ignoreMissing, firstOnly);
}
}

Expand Down
Loading