Skip to content

Commit

Permalink
Merge remote-tracking branch 'frodrigo/master'
Browse files Browse the repository at this point in the history
* frodrigo/master:
  Fix config of country in Name_UpperCase
  Ignore camp_type/lamp_type in analyser_osmosis_tag_typo #534
  Ignore Inventaire in analyser_merge_heritage_FR_merimee #517
  Better select in analyser_merge_shop_FR & move mapping to json #338
  Fuzzy geocoding results at municipality level in analyser_merge_power_plant_FR #510
  Fix bbox size in Analyser_Merge #523
  Fix logger param in Analyser_Merge / Analyser_Merge_Mapillary
  • Loading branch information
jocelynj committed Jun 17, 2019
2 parents ee3bd19 + a53d37c commit d27bce5
Show file tree
Hide file tree
Showing 9 changed files with 990 additions and 93 deletions.
21 changes: 12 additions & 9 deletions analysers/Analyser_Merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@
"""

class Source:
def __init__(self, attribution = None, millesime = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, gzip = False, filter = None, logger = None):
def __init__(self, attribution = None, millesime = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, gzip = False, filter = None):
"""
Describe the source file.
@param encoding: file charset encoding
Expand All @@ -289,7 +289,6 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
@param zip: extract file from zip
@param gzip: uncompress as gzip
@param filter: lambda expression applied on text file before loading
@param logger: a logger
"""
self.attribution = attribution
self.millesime = millesime
Expand All @@ -300,7 +299,6 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
self.zip = zip
self.gzip = gzip
self.filter = filter
self.logger = logger

if self.file:
if not os.path.isabs(self.file):
Expand Down Expand Up @@ -542,7 +540,7 @@ def __init__(self, source):

class Load(object):
def __init__(self, x = ("NULL",), y = ("NULL",), srid = 4326, table_name = None, create = None,
select = {}, uniq = None, where = lambda res: True, xFunction = lambda i: i, yFunction = lambda i: i):
select = {}, uniq = None, where = lambda res: True, map = lambda i: i, xFunction = lambda i: i, yFunction = lambda i: i):
"""
Describ the conversion of data set loaded with COPY into the database into an other table more usable for processing.
@param x: the name of x column, as or converted to longitude, can be a SQL expression formatted as ("SQL CODE",)
Expand All @@ -553,6 +551,7 @@ def __init__(self, x = ("NULL",), y = ("NULL",), srid = 4326, table_name = None,
@param select: dict reformatted as SQL to filter row import before conversion, prefer this as the where param
@param uniq: select distinct by column list
@param where: lambda expression taking row as dict and returning boolean to determine whether or not inserting the row into the table
@param map: lambda return a replace record
@param xFunction: lambda expression for convert x content column before reprojection, identity by default
@param yFunction: lambda expression for convert y content column before reprojection, identity by default
"""
Expand All @@ -564,6 +563,7 @@ def __init__(self, x = ("NULL",), y = ("NULL",), srid = 4326, table_name = None,
self.select = select
self.uniq = uniq
self.where = where
self.map = map
self.xFunction = xFunction
self.yFunction = yFunction

Expand Down Expand Up @@ -644,9 +644,12 @@ def setData(res):
giscurs_getpoint = osmosis.gisconn.cursor(cursor_factory=psycopg2.extras.DictCursor)
mult_space = re.compile(r'\s+')
def insertOfficial(res):
x = self.xFunction(res[0])
y = self.yFunction(res[1])
if (not self.pip or (x and y)) and self.where(res):
if not self.where(res):
return
res = self.map(res)
x = self.xFunction(res['_x'])
y = self.yFunction(res['_y'])
if not self.pip or (x and y):
is_pip = False
if self.pip:
giscurs_getpoint.execute("SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), %(SRID)s), 4326))" % {"x": x, "y": y, "SRID": self.srid})
Expand Down Expand Up @@ -875,9 +878,9 @@ def analyser_osmosis_common(self):
LEFT JOIN LATERAL regexp_split_to_table(tags->'%(ref)s', ';') a(ref) ON true
WHERE""" + ("""
%(geomSelect)s IS NOT NULL AND""" if self.load.srid else "") + ("""
ST_SetSRID(ST_GeomFromText('%(bbox)s'), 4326) && %(geomSelect)s AND""" if self.load.bbox and self.load.srid else "") + """
ST_SetSRID(ST_Expand(ST_GeomFromText('%(bbox)s'), %(distance)s), 4326) && %(geomSelect)s AND""" if self.load.bbox and self.load.srid else "") + """
tags != ''::hstore AND
%(where)s)""") % {"type":type[0].upper(), "ref":self.mapping.osmRef, "geomSelect":typeSelect[type[0].upper()], "geom":typeGeom[type[0].upper()], "shape":typeShape[type[0].upper()], "from":type, "bbox":self.load.bbox, "where":where},
%(where)s)""") % {"type":type[0].upper(), "ref":self.mapping.osmRef, "geomSelect":typeSelect[type[0].upper()], "geom":typeGeom[type[0].upper()], "shape":typeShape[type[0].upper()], "from":type, "bbox":self.load.bbox, "distance": self.mapping.conflationDistance, "where":where},
self.mapping.select.types
)
))
Expand Down
3 changes: 2 additions & 1 deletion analysers/Analyser_Merge_Mapillary.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@


class Source_Mapillary(Source):
def __init__(self, country, polygon_id, mapping, layer, **args):
def __init__(self, country, polygon_id, mapping, layer, logger, **args):
self.polygon_id = polygon_id
self.mapping = mapping
self.layer = layer
self.logger = logger
Source.__init__(self, **args)
self.fileUrl = u'mapillary-feature-{0}-{1}.csv'.format(country, SourceVersion.version(self.mapping))
self.fileUrlCache = 120
Expand Down
2 changes: 1 addition & 1 deletion analysers/analyser_merge_heritage_FR_merimee.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def parseDPRO(dpro):
tags = {
# "heritage": ["1", "2", "3"],
"heritage:operator": None,
"ref:mhs": lambda t: "{0} NOT LIKE 'PM%'".format(t)}), # Not a Palissy ref
"ref:mhs": lambda t: "{0} NOT LIKE 'PM%' AND {0} NOT LIKE 'IA%'".format(t)}), # Not a Palissy ref nor "Inventaire général du patrimoine culturel" ref
osmRef = "ref:mhs",
conflationDistance = 1000,
generate = Generate(
Expand Down
4 changes: 3 additions & 1 deletion analysers/analyser_merge_power_plant_FR.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .Analyser_Merge import Analyser_Merge, Source, CSV, Load, Mapping, Select, Generate
from .Analyser_Merge_Geocode_Addok_CSV import Geocode_Addok_CSV
from .modules import downloader
from .modules import Stablehash


class Analyser_Merge_Power_Plant_FR(Analyser_Merge):
Expand All @@ -39,7 +40,8 @@ def __init__(self, config, logger = None):
columns = 'Commune', citycode = 'codeINSEECommune', delimiter = ';', logger = logger),
separator = u";"),
Load("longitude", "latitude",
where = lambda res: res.get('max_puissance') and float(res["max_puissance"]) > 1000),
where = lambda res: res.get('max_puissance') and float(res["max_puissance"]) > 1000,
map = lambda res: dict(res, **{"_x": float(res["_x"]) + (Stablehash.stablehash(str(res)) % 200 - 100) * 0.00001, "_y": float(res["_y"]) + (Stablehash.stablehash(str(res)) % 212 - 106) * 0.00001})),
Mapping(
select = Select(
types = ["ways", "relations"],
Expand Down
30 changes: 12 additions & 18 deletions analysers/analyser_merge_shop_FR.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from io import open # In python3 only, this import is not required
from backports import csv # In python3 only just "import csv"
import json
from .Analyser_Merge_Dynamic import Analyser_Merge_Dynamic, SubAnalyser_Merge_Dynamic
from .Analyser_Merge import Source, CSV, Load, Mapping, Select, Generate
from time import gmtime, strftime
Expand All @@ -32,33 +33,26 @@ class Analyser_Merge_Shop_FR(Analyser_Merge_Dynamic):
def __init__(self, config, logger = None):
Analyser_Merge_Dynamic.__init__(self, config, logger)

with open("merge_data/shop_FR.mapping.csv") as mappingfile:
spamreader = csv.reader(mappingfile)
for row in spamreader:
if row[0][0] == '#':
continue
items, classs, level, title = row[0:4]
items = items.split('|')
level = int(level)
osmTags = filter(lambda a: a, map(lambda t: (t.split('=') + [None])[0:2] if t else None, row[4:]))
if len(osmTags) > 0:
self.classFactory(SubAnalyser_Merge_Shop_FR, classs.replace('.', ''), items, classs, level, title, dict(osmTags), dict(filter(lambda a: a[1], osmTags)))
mapping = 'merge_data/shop_FR.mapping.json'
mapingfile = json.loads(open(mapping).read())
for r in mapingfile:
self.classFactory(SubAnalyser_Merge_Shop_FR, r['class'].replace('.', ''), r['missing_official'], r['missing_osm'], r['class'], r['level'], r['title'], r['match'], r['generate'])


class SubAnalyser_Merge_Shop_FR(SubAnalyser_Merge_Dynamic):
def __init__(self, config, error_file, logger, items, classs, level, title, selectTags, generateTags):
def __init__(self, config, error_file, logger, missing_official, missing_osm, classs, level, title, selectTags, generateTags):
classss = int(classs.replace('.', '0')[:-1]) * 100 + ord(classs[-1]) - 65
self.missing_official = {"item": items[0], "class": classss+1, "level": level, "tag": ["merge"], "desc": T_(u"%s not integrated", title) }
#self.missing_osm = {"item": items[1], "class": classss+2, "level": level, "tag": ["merge"], "desc": T_f(u"{0} without tag \"{1}\" or invalid", title, 'ref:FR:SIRET') }
#self.possible_merge = {"item": items[0][0:-1]+"1", "class": classss+3, "level": level, "tag": ["merge"], "desc": T_(u"%s, integration suggestion", title) }
#self.update_official = {"item": items[0][0:-1]+"2", "class": classss+4, "level": level, "tag": ["merge"], "desc": T_(u"%s update", title) }
self.missing_official = {"item": missing_official, "class": classss+1, "level": level, "tag": ["merge"], "desc": T_(u"%s not integrated", title) }
#self.missing_osm = {"item": missing_osm, "class": classss+2, "level": level, "tag": ["merge"], "desc": T_f(u"{0} without tag \"{1}\" or invalid", title, 'ref:FR:SIRET') }
#self.possible_merge = {"item": missing_official[0:-1]+"1", "class": classss+3, "level": level, "tag": ["merge"], "desc": T_(u"%s, integration suggestion", title) }
#self.update_official = {"item": missing_official[0:-1]+"2", "class": classss+4, "level": level, "tag": ["merge"], "desc": T_(u"%s update", title) }

dep_code = config.options.get('country').split('-')[1]

SubAnalyser_Merge_Dynamic.__init__(self, config, error_file, logger,
u"http://www.sirene.fr/sirene/public/static/open-data",
u"Sirene",
CSV(Source(attribution = u"INSEE", millesime = "06/2018", gzip = True,
CSV(Source(attribution = u"INSEE", millesime = "06/2019", gzip = True,
fileUrl = u"http://data.cquest.org/geo_sirene/v2019/last/dep/geo_siret_{0}.csv.gz".format(dep_code))),
Load("longitude", "latitude",
select = {"activitePrincipaleEtablissement": classs, "geo_type": "housenumber", "etatAdministratifEtablissement": "A"},
Expand All @@ -81,4 +75,4 @@ def __init__(self, config, error_file, logger, items, classs, level, title, sele
# "-".join([fields["DDEBACT"][0:4], fields["DDEBACT"][4:6], fields["DDEBACT"][6:8]]) if fields["DDEBACT"] != "19000101" else
# "-".join([fields["DCRET"][0:4], fields["DCRET"][4:6], fields["DCRET"][6:8]]) if fields["DCRET"] != "19000101" else
# None},
text = lambda tags, fields: {"en": ', '.join(filter(lambda f: f and f != 'None', [fields["enseigne1Etablissement"] or fields["denominationUsuelleEtablissement"]] + map(lambda k: fields[k], ["numeroVoieEtablissement", "indiceRepetitionEtablissement" ,"typeVoieEtablissement", "libelleVoieEtablissement", "complementAdresseEtablissement", "codePostalEtablissement", "libelleCommuneEtablissement"])))} )))
text = lambda tags, fields: {"en": ', '.join(filter(lambda f: f and f != 'None', [fields["enseigne1Etablissement"] or fields["denominationUsuelleEtablissement"]] + list(map(lambda k: fields[k], ["numeroVoieEtablissement", "indiceRepetitionEtablissement" ,"typeVoieEtablissement", "libelleVoieEtablissement", "complementAdresseEtablissement", "codePostalEtablissement", "libelleCommuneEtablissement"]))))} )))
1 change: 1 addition & 0 deletions analysers/analyser_osmosis_tag_typo.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
'addr2', 'addr3',
'kerb', 'kern',
'lock_name', 'loc_name',
'camp_type', 'lamp_type',
'name_1', 'name_2', 'name_3', 'name_4', 'name_5', 'name_6', 'name_7', 'name_8', 'name_9' -- Tiger mess
) AND
NOT key LIKE 'AND_%'
Expand Down
61 changes: 0 additions & 61 deletions merge_data/shop_FR.mapping.csv

This file was deleted.

Loading

0 comments on commit d27bce5

Please sign in to comment.