Skip to content

Commit

Permalink
Add an option to run place-name-resolver without GCS access (#410)
Browse files Browse the repository at this point in the history
  • Loading branch information
pradh authored Mar 23, 2021
1 parent 5b34628 commit 4bccd9f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 15 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.14

require (
cloud.google.com/go/storage v1.10.0 // indirect
google.golang.org/grpc v1.29.1 // indirect
googlemaps.github.io/maps v1.2.2 // indirect
)
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121 h1:rITEj+UZHYC927n8GT97eC3zrpzXdb/voyeOuVKS46o=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
10 changes: 10 additions & 0 deletions tools/place_name_resolver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,22 @@ includes local-id references to contained-in places.

For sample input/output CSVs, see the `.csv` files in `place_name_resolver/testdata` directory.

NOTE: If the `--generate_place_id` is set, then in place of DCIDs, Maps placeIDs
are returned. This is useful when you cannot access the GCS bucket storing the
placeID to DCID map.

## Usage

```
go run resolver.go --in_csv_path=testdata/input_basic.csv --out_csv_path=/tmp/output_basic.csv --maps_api_key=<YOUR_KEY_HERE>
```

For generating place ID:

```
go run resolver.go --in_csv_path=testdata/input_basic.csv --out_csv_path=/tmp/output_basic.csv --generate_place_id --maps_api_key=<YOUR_KEY_HERE>
```

## Testing

```
Expand Down
35 changes: 21 additions & 14 deletions tools/place_name_resolver/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ var (
"literal 'Node' to represent a local ID. containedInPlace is always assumed to be a local reference.")
outCsvPath = flag.String("out_csv_path", "", "Same as input with additional column for DCID.")
mapsApiKey = flag.String("maps_api_key", "", "Key for accessing Geocoding Maps API.")
generatePlaceID = flag.Bool("generate_place_id", false, "If set, placeID is generated in output CSV instead of DCID.")
)

const (
Expand Down Expand Up @@ -186,20 +187,19 @@ func buildTableInfo(inCsvPath string) (*tableInfo, error) {
return tinfo, nil
}

func loadPlaceIdToDcidMap(p2d PlaceId2Dcid) (*map[string]string, error) {
func loadPlaceIdToDcidMap(p2d PlaceId2Dcid, placeId2Dcid map[string]string) error {
bytes, err := p2d.Read()
if err != nil {
return nil, err
return err
}
placeId2Dcid := &map[string]string{}
err = json.Unmarshal(bytes, &placeId2Dcid)
if err != nil {
return nil, err
return err
}
return placeId2Dcid, nil
return nil
}

func geocodeOneRow(idx int, placeId2Dcid *map[string]string, tinfo *tableInfo, mapCli MapsClient, wg *sync.WaitGroup) {
func geocodeOneRow(idx int, placeId2Dcid map[string]string, tinfo *tableInfo, mapCli MapsClient, wg *sync.WaitGroup) {
defer wg.Done()
extName := tinfo.extNames[idx]
req := &maps.GeocodingRequest{
Expand All @@ -217,7 +217,11 @@ func geocodeOneRow(idx int, placeId2Dcid *map[string]string, tinfo *tableInfo, m
}
// TODO: Deal with place-type checks and multiple results.
for _, result := range results[:1] {
dcid, ok := (*placeId2Dcid)[result.PlaceID]
if len(placeId2Dcid) == 0 {
tinfo.rows[idx] = append(tinfo.rows[idx], result.PlaceID, "")
continue
}
dcid, ok := placeId2Dcid[result.PlaceID]
if !ok {
tinfo.rows[idx] = append(tinfo.rows[idx], "", fmt.Sprintf("Missing dcid for placeId %s", result.PlaceID))
} else {
Expand All @@ -226,7 +230,7 @@ func geocodeOneRow(idx int, placeId2Dcid *map[string]string, tinfo *tableInfo, m
}
}

func geocodePlaces(mapCli MapsClient, placeId2Dcid *map[string]string, tinfo *tableInfo) error {
func geocodePlaces(mapCli MapsClient, placeId2Dcid map[string]string, tinfo *tableInfo) error {
for i := 0; i < len(tinfo.rows); i += batchSize {
var wg sync.WaitGroup
jMax := i + batchSize
Expand Down Expand Up @@ -263,16 +267,19 @@ func writeOutput(outCsvPath string, tinfo *tableInfo) error {
return nil
}

func resolvePlacesByName(inCsvPath, outCsvPath string, p2d PlaceId2Dcid, mapCli MapsClient) error {
func resolvePlacesByName(inCsvPath, outCsvPath string, generatePlaceID bool, p2d PlaceId2Dcid, mapCli MapsClient) error {
tinfo, err := buildTableInfo(inCsvPath)
if err != nil {
return err
}
placeIdToDcid, err := loadPlaceIdToDcidMap(p2d)
if err != nil {
return err
placeId2Dcid := map[string]string{}
if !generatePlaceID {
err = loadPlaceIdToDcidMap(p2d, placeId2Dcid)
if err != nil {
return err
}
}
err = geocodePlaces(mapCli, placeIdToDcid, tinfo)
err = geocodePlaces(mapCli, placeId2Dcid, tinfo)
if err != nil {
return err
}
Expand All @@ -288,7 +295,7 @@ func main() {
log.Fatalf("Maps API init failed: %v", err)
}

err = resolvePlacesByName(*inCsvPath, *outCsvPath, &RealPlaceId2Dcid{}, &RealMapsClient{Client: mapCli})
err = resolvePlacesByName(*inCsvPath, *outCsvPath, *generatePlaceID, &RealPlaceId2Dcid{}, &RealMapsClient{Client: mapCli})
if err != nil {
log.Fatalf("resolvePlacesByName failed: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion tools/place_name_resolver/resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func TestMain(t *testing.T) {
{"input_containment.csv", "expected_output_containment.csv", "actual_output_containment.csv", getMockGeocodesContainment()},
}
for _, t := range table {
err := resolvePlacesByName("testdata/"+t.in, "testdata/"+t.got, &MockPlaceId2Dcid{}, t.mapCli)
err := resolvePlacesByName("testdata/"+t.in, "testdata/"+t.got, false, &MockPlaceId2Dcid{}, t.mapCli)
if err != nil {
log.Fatal(err)
}
Expand Down

0 comments on commit 4bccd9f

Please sign in to comment.