-
Notifications
You must be signed in to change notification settings - Fork 2
/
pleiades-tgn.rb
executable file
·113 lines (96 loc) · 3.52 KB
/
pleiades-tgn.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env ruby
Encoding.default_external = Encoding::UTF_8
Encoding.default_internal = Encoding::UTF_8
require 'json'
require 'csv'
tgn_labels_nt, tgn_parents_nt, tgn_geometries_nt, places_csv, names_csv = ARGV
distance_threshold = 8.0
places = {}
pleiades_names = {}
def haversine_distance(lat1, lon1, lat2, lon2)
km_conv = 6371 # km
dLat = (lat2-lat1) * Math::PI / 180
dLon = (lon2-lon1) * Math::PI / 180
lat1 = lat1 * Math::PI / 180
lat2 = lat2 * Math::PI / 180
a = Math.sin(dLat/2) * Math.sin(dLat/2) + Math.sin(dLon/2) * Math.sin(dLon/2) * Math.cos(lat1) * Math.cos(lat2)
c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a))
d = km_conv * c
end
$stderr.puts "Parsing Pleiades places..."
CSV.foreach(places_csv, :headers => true) do |row|
places[row["id"]] = row.to_hash
end
$stderr.puts places.keys.length
$stderr.puts "Parsing Pleiades names..."
CSV.foreach(names_csv, :headers => true) do |row|
place_id = row["pid"].split('/')[2]
unless places[place_id].nil?
places[place_id]["names"] ||= []
places[place_id]["names"] << row.to_hash
end
[row["title"], row["nameAttested"], row["nameTransliterated"]].each do |name|
pleiades_names[name] ||= []
pleiades_names[name] << place_id unless (pleiades_names[name].include?(place_id) || place_id.nil?)
end
end
$stderr.puts pleiades_names.keys.length
tgn_labels = {}
tgn_first_label = {}
$stderr.puts "Parsing TGN labels..."
File.open(tgn_labels_nt).each do |line|
subject, predicate, object = line.split(' ')
tgn_toponym = object[/"(.+)"/,1]
unless tgn_toponym.nil?
tgn_toponym.gsub!(/\\u(.{4})/) {|m| [$1.to_i(16)].pack('U')}
tgn_labels[tgn_toponym] ||= []
tgn_labels[tgn_toponym] << subject
tgn_first_label[subject] ||= tgn_toponym
end
end
$stderr.puts tgn_labels.keys.length
tgn_parents = {}
$stderr.puts "Parsing TGN parents..."
File.open(tgn_parents_nt).each do |line|
subject = line.split(' ')[0]
predicate = line.split(' ')[1]
object = line.split(' ')[2..-2].join(' ')
tgn_parent = object #[/"(.+)"/,1]
unless tgn_parent.nil?
tgn_parent.gsub!(/\\u(.{4})/) {|m| [$1.to_i(16)].pack('U')}
tgn_parents[subject] = tgn_parent
end
end
$stderr.puts tgn_parents.keys.length
tgn_geometries = {}
$stderr.puts "Parsing TGN geometries..."
File.open(tgn_geometries_nt).each do |line|
subject, predicate, object = line.split(' ')
tgn_geometries[subject] ||= {}
if predicate == '<http://schema.org/latitude>'
tgn_geometries[subject][:latitude] = object[/"(.+)"/,1].to_f
elsif predicate == '<http://schema.org/longitude>'
tgn_geometries[subject][:longitude] = object[/"(.+)"/,1].to_f
end
end
$stderr.puts tgn_geometries.keys.length
$stderr.puts "Checking matches..."
pleiades_names.each do |pleiades_name, pleiades_ids|
unless pleiades_name.nil? || pleiades_name.empty?
if tgn_labels.has_key?(pleiades_name)
tgn_labels[pleiades_name].each do |tgn_id|
geometry_subject = tgn_id.sub('>','-geometry>')
if tgn_geometries.has_key?(geometry_subject)
geometry = tgn_geometries[geometry_subject]
pleiades_ids.each do |pleiades_id|
unless places[pleiades_id].nil? || geometry.nil?
if haversine_distance(geometry[:latitude], geometry[:longitude], places[pleiades_id]["reprLat"].to_f, places[pleiades_id]["reprLong"].to_f) <= distance_threshold
puts [tgn_id.tr('<>',''),"http://pleiades.stoa.org/places/#{pleiades_id}",tgn_first_label[tgn_id],tgn_parents[tgn_id]].join(',')
end
end
end
end
end
end
end
end