Skip to content

Commit

Permalink
Merge remote-tracking branch 'frodrigo/master'
Browse files Browse the repository at this point in the history
* frodrigo/master:
  Use remote data source for analyser_merge_heritage_FR_merimee
  Refactoring of Geocoding with Addok
  Make more and generic analyser_merge_shop_FR #338
  config: Move merge_shop_FR to Vaucluse
  • Loading branch information
jocelynj committed Jun 9, 2019
2 parents 51319b9 + 13e0e04 commit 2d0ad1d
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 46 deletions.
70 changes: 70 additions & 0 deletions analysers/Analyser_Merge_Geocode_Addok_CSV.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-

###########################################################################
## ##
## Copyrights Frédéric Rodrigo 2019 ##
## ##
## This program is free software: you can redistribute it and/or modify ##
## it under the terms of the GNU General Public License as published by ##
## the Free Software Foundation, either version 3 of the License, or ##
## (at your option) any later version. ##
## ##
## This program is distributed in the hope that it will be useful, ##
## but WITHOUT ANY WARRANTY; without even the implied warranty of ##
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##
## GNU General Public License for more details. ##
## ##
## You should have received a copy of the GNU General Public License ##
## along with this program. If not, see <http://www.gnu.org/licenses/>. ##
## ##
###########################################################################

from io import open # In python3 only, this import is not required
from .Analyser_Merge import Source
from .modules import downloader


class Geocode_Addok_CSV(Source):

def __init__(self, source, columns, logger, citycode = None, delimiter = ',', encoding = 'utf-8'):
self.source = source
self.columns = columns
self.citycode = citycode
self.delimiter = delimiter
self.encoding = encoding
self.logger = logger

def __getattr__(self, name):
return getattr(self.source, name)

def open(self):
return open(downloader.update_cache('geocoded://' + self.source.fileUrl, 60, self.fetch))

def fetch(self, url, tmp_file, date_string=None):
service = u'https://api-adresse.data.gouv.fr/search/csv/'
outfile = open(tmp_file, 'w', encoding='utf-8')

content = self.source.open().readlines()
header = content[0:1]
step = 2000
slices = int((len(content)-1) / step) + 1
for i in range(0, slices):
self.logger.log("Geocode slice {0}/{1}".format(i, slices))
slice = ''.join(header + content[1 + step*i : 1 + step*(i+1)])
r = downloader.requests_retry_session().post(url=service, data={
'delimiter': self.delimiter,
'encoding': self.encoding,
'columns': self.columns,
'citycode': self.citycode,
}, files={
'data': slice,
})
r.raise_for_status()
if i == 0:
text = '\n'.join(r.text.split('\n')[0:])
else:
text = '\n'.join(r.text.split('\n')[1:])
writer = outfile.write(text)

return True
16 changes: 12 additions & 4 deletions analysers/analyser_merge_heritage_FR_merimee.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import re
from .Analyser_Merge import Analyser_Merge, Source, CSV, Load, Mapping, Select, Generate
from .Analyser_Merge_Geocode_Addok_CSV import Geocode_Addok_CSV
from functools import reduce


class Analyser_Merge_Heritage_FR_Merimee(Analyser_Merge):
Expand Down Expand Up @@ -56,12 +58,18 @@ def parseDPRO(dpro):
u"Dolmen", u"Hôtel", u"Ancien château", u"Immeuble", u"Eglise", u"Maison"
]

SKIP = [
u"Ile-de-France;Paris;Paris 9e Arrondissement;Immeubles aux abords de l'Opéra (voir aussi : 25, 27, 29, 31 bd Haussmann, Immeuble de la Société Générale);19e siècle;Rohaut de Fleury Charles (architecte);Façades et toitures sur rue des immeubles situés 3, 5, 7 rue Auber, 1 rue Boudreau, 4, 6, 8 boulevard des Capucines, 3, 3bis, 5, 7, 9, 11, 13 rue de la Chaussée-d'Antin, 2, 4, 6, 8, 10, 12, 14, 16 rue Halévy, 1 rue des Mathurins, 1, 2, 3, 4, 5, 7 rue Meyerbeer, 9, 11, 11bis, 15, 17 rue Scribe, 2 rue Auber, 7, place Charles-Garnier : inscription par arrêté du 30 décembre 1977, modifiée par arrêtés des 16 mai 2013 et 14 juin 2013;\"1977/12/30 : inscrit MH ; 2013/05/16 : inscrit MH ; 2013/06/14 : inscrit MH\";propriété privée;\"Auber (rue) 2, 3, 5, 7 ; Boudreau (rue) 1 ; Capucines (boulevard des) 4, 6, 8 ; Charles-Garnier (place) 7 ; Chaussée-d'Antin (rue de la) 3 à 13 ; Halévy (rue) 2 à 16 ; Mathurins (rue des) 1 ; Meyerbeer (rue) 1, 2, 3, 4, 5, 7 ; Scribe (rue) 9, 11, 11bis, 15, 17\";75109;;recensement immeubles MH;PA00088922;48.8768961624, 2.33746024139;75;Ile-de-France;ile de france",
u"Normandie;Calvados;Falaise;Vestiges de l'enceinte fortifiée;\"13e siècle;17e siècle\";;\"Restes de la porte Lecomte : inscription par arrêté du 31 mai 1927 ; Porte des Cordeliers : classement par arrêté du 13 mars 1930 ; Vestiges de l'enceinte fortifiée : de la porte du Château à la porte de Guibray : rue Porte-du-Château 10, 8 (cad. B 67, 68) , rue Blâcher 32, 28, 22, 10, 6 (cad. D 87, 92, 96, 105, 108 à 110). De la porte Guibray à la porte Marescot : rue Amiral-Courbet (cad. D 521, 519, 513, 514, 515, 512, 509). De la porte Marescot à la porte Lecomte : rue Georges-Clémenceau (cad. B 801) , rue Victor-Hugo 15, 17, 19, 21, 23, 25 (cad. B 604, 608, 612, 615) , rue du Sergent-Goubin (cad. B 625). De la porte Lecomte à la route de Caen : rue du Sergent Goubin 24, 22, 20, 2 (cad. B 566, 569, 562, 563, 559, 556, 557, 1058 à 1060) , rue Gambetta 18, 14, 12 (cad. B 1045, 1048, 994, 997). De la route de Caen à la porte Philippe-Jean : rue Frédéric-Gaberon (cad. E 235) , rue des Cordeliers (cad. E 247) , rue du Camp-Ferme (cad. E 354, 364, 365, 370, 383). De la porte Philippe-Jean auchâteau : place Guillaume-le-Conquérant et rue de la Porte-Philippe-Jean (cad. E 585, 572, 578) , place Guillaume-le-Conquérant (cad. E 594, 610, 612, 613) : inscription par arrêté du 19 juin 1951\";\"1927/05/31 : inscrit MH ; 1930/03/13 : classé MH ; 1951/06/19 : inscrit MH\";\"propriété de la commune ; propriété d'une personne privée\";\"Porte-du-Château (rue) ; Blâcher (rue) ; Amiral-Courbet (rue) ; Georges-Clémenceau (rue) ; Victor-Hugo (rue) ; Sergent-Goubin (rue du) ; Gambetta (rue) ; Frédéric-Gaberon (rue) ; Cordeliers (rue des) ; Camp-Ferme (rue du) ; Guillaume-le-Conquérant (place)\";14258;;Recensement immeubles MH;PA00111315;48.8957800281, -0.193401711782;14;Basse-Normandie;basse normandie",
]

Analyser_Merge.__init__(self, config, logger,
u"https://data.culturecommunication.gouv.fr/explore/dataset/liste-des-immeubles-proteges-au-titre-des-monuments-historiques/",
u"https://data.culture.gouv.fr/explore/dataset/liste-des-immeubles-proteges-au-titre-des-monuments-historiques/",
u"Immeubles protégés au titre des Monuments Historiques",
# Original without accurate location, geocoded with https://adresse.data.gouv.fr/csv
CSV(Source(attribution = u"Ministère de la Culture", millesime = "07/2018",
file = "heritage_FR_merimee.csv.bz2"),
CSV(Geocode_Addok_CSV(Source(attribution = u"Ministère de la Culture", millesime = "06/2019",
fileUrl = u"https://data.culture.gouv.fr/explore/dataset/liste-des-immeubles-proteges-au-titre-des-monuments-historiques/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true",
filter = lambda s: reduce(lambda a, v: a.replace(v, ''), SKIP, (u'' + s).encode('utf-8').replace(b'l\u92', b"l'").replace(b'\x85)', b"...)").decode('utf-8', 'ignore'))),
columns = 'Adresse', citycode = 'INSEE', delimiter = u';', logger = logger),
separator = u';'),
Load("longitude", "latitude",
select = {u"Date de Protection": True}),
Expand Down
42 changes: 5 additions & 37 deletions analysers/analyser_merge_power_plant_FR.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import re
from io import open # In python3 only, this import is not required
from .Analyser_Merge import Analyser_Merge, Source, CSV, Load, Mapping, Select, Generate
from .Analyser_Merge_Geocode_Addok_CSV import Geocode_Addok_CSV
from .modules import downloader


Expand All @@ -33,8 +34,9 @@ def __init__(self, config, logger = None):
Analyser_Merge.__init__(self, config, logger,
u"https://opendata.reseaux-energies.fr/explore/dataset/registre-national-installation-production-stockage-electricite-agrege-311217",
u"Registre national des installations de production d'électricité et de stockage",
CSV(Power_Plant_FR_Source(attribution = u"data.gouv.fr:RTE", millesime = "2017",
fileUrl = u"https://opendata.reseaux-energies.fr/explore/dataset/registre-national-installation-production-stockage-electricite-agrege-311217/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true", logger=logger),
CSV(Geocode_Addok_CSV(Source(attribution = u"data.gouv.fr:RTE", millesime = "2017",
fileUrl = u"https://opendata.reseaux-energies.fr/explore/dataset/registre-national-installation-production-stockage-electricite-agrege-311217/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true"),
columns = 'Commune', citycode = 'codeINSEECommune', delimiter = ';', logger = logger),
separator = u";"),
Load("longitude", "latitude",
where = lambda res: res.get('max_puissance') and float(res["max_puissance"]) > 1000),
Expand All @@ -55,7 +57,7 @@ def __init__(self, config, logger = None):
mapping2 = {
"start_date": lambda fields: None if not fields.get(u"dateMiseEnService") else fields[u"dateMiseEnService"][0:4] if fields[u"dateMiseEnService"].endswith('-01-01') or fields[u"dateMiseEnService"].endswith('-12-31') else fields[u"dateMiseEnService"]},
tag_keep_multiple_values = ["voltage"],
text = lambda tags, fields: T_(u"Power plant %s", fields["nomInstallation"]) if fields["nomInstallation"] != 'None' else None)))
text = lambda tags, fields: T_(u"Power plant %s", ', '.join(filter(lambda res: res and res != 'None', [fields["nomInstallation"], fields["Commune"]]))) )))

filiere = {
u"Autre": {
Expand All @@ -81,37 +83,3 @@ def __init__(self, config, logger = None):
u"Fioul": "oil",
u"Gaz": "gaz"},
}


class Power_Plant_FR_Source(Source):

def open(self):
return open(downloader.update_cache('geocoded://' + self.fileUrl, 60, self.fetch))

def fetch(self, url, tmp_file, date_string=None):
service = u'https://api-adresse.data.gouv.fr/search/csv/'
outfile = open(tmp_file, 'w', encoding='utf-8')

content = Source.open(self).readlines()
header = content[0:1]
step = 2000
slices = int((len(content)-1) / step) + 1
for i in range(0, slices):
self.logger.log("Geocode slice {0}/{1}".format(i, slices))
slice = '\n'.join(header + content[1 + step*i : step*(i+1)])
r = downloader.requests_retry_session().post(url=service, data={
'delimiter': ';',
'encoding': 'utf-8',
'columns': 'Commune',
'citycode': 'codeINSEECommune',
}, files={
'data': slice,
})
r.raise_for_status()
if i == 0:
text = '\n'.join(r.text.split('\n')[0:])
else:
text = '\n'.join(r.text.split('\n')[1:])
writer = outfile.write(text)

return True
9 changes: 6 additions & 3 deletions analysers/analyser_merge_shop_FR.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ def __init__(self, config, error_file, logger, items, classs, level, title, sele
#self.possible_merge = {"item": items[0][0:-1]+"1", "class": classss+3, "level": level, "tag": ["merge"], "desc": T_(u"%s, integration suggestion", title) }
#self.update_official = {"item": items[0][0:-1]+"2", "class": classss+4, "level": level, "tag": ["merge"], "desc": T_(u"%s update", title) }

dep_code = config.options.get('country').split('-')[1]

SubAnalyser_Merge_Dynamic.__init__(self, config, error_file, logger,
u"http://www.sirene.fr/sirene/public/static/open-data",
u"Sirene",
CSV(Source(attribution = u"INSEE", millesime = "06/2018", gzip = True,
fileUrl = u"http://data.cquest.org/geo_sirene/v2019/last/dep/geo_siret_84.csv.gz")),
fileUrl = u"http://data.cquest.org/geo_sirene/v2019/last/dep/geo_siret_{0}.csv.gz".format(dep_code))),
Load("longitude", "latitude",
select = {"activitePrincipaleEtablissement": classs},
select = {"activitePrincipaleEtablissement": classs, "geo_type": "housenumber", "etatAdministratifEtablissement": "A"},
where = lambda res: float(res["geo_score"]) > 0.9,
uniq = ["siren", "nic"]),
Mapping(
select = Select(
Expand All @@ -78,4 +81,4 @@ def __init__(self, config, error_file, logger, items, classs, level, title, sele
# "-".join([fields["DDEBACT"][0:4], fields["DDEBACT"][4:6], fields["DDEBACT"][6:8]]) if fields["DDEBACT"] != "19000101" else
# "-".join([fields["DCRET"][0:4], fields["DCRET"][4:6], fields["DCRET"][6:8]]) if fields["DCRET"] != "19000101" else
# None},
text = lambda tags, fields: {"en": ', '.join(filter(lambda f: f, [fields["enseigne1Etablissement"] or fields["denominationUsuelleEtablissement"]] + map(lambda k: fields[k], ["numeroVoieEtablissement", "indiceRepetitionEtablissement" ,"typeVoieEtablissement", "libelleVoieEtablissement", "complementAdresseEtablissement", "codePostalEtablissement", "libelleCommuneEtablissement"])))} )))
text = lambda tags, fields: {"en": ', '.join(filter(lambda f: f and f != 'None', [fields["enseigne1Etablissement"] or fields["denominationUsuelleEtablissement"]] + map(lambda k: fields[k], ["numeroVoieEtablissement", "indiceRepetitionEtablissement" ,"typeVoieEtablissement", "libelleVoieEtablissement", "complementAdresseEtablissement", "codePostalEtablissement", "libelleCommuneEtablissement"])))} )))
Binary file removed merge_data/heritage_FR_merimee.csv.bz2
Binary file not shown.
5 changes: 3 additions & 2 deletions osmose_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,9 @@ class gen(default_country):
'merge_street_number_arles',
])
france_departement("provence_alpes_cote_d_azur/var", 7390, "FR-83")
france_departement("provence_alpes_cote_d_azur/vaucluse", 7445, "FR-84")
france_departement("provence_alpes_cote_d_azur/vaucluse", 7445, "FR-84", include=[
'merge_shop_FR',
])

france_departement("rhone_alpes/ain", 7387, "FR-01")
france_departement("rhone_alpes/ardeche", 7430, "FR-07")
Expand Down Expand Up @@ -536,7 +538,6 @@ class gen(default_country):
france_local_db.analyser["merge_power_plant_FR"] = "xxx"
france_local_db.analyser["merge_power_substation_FR"] = "xxx"
france_local_db.analyser["merge_power_tower_FR"] = "xxx"
france_local_db.analyser["merge_shop_FR"] = "xxx"
france_local_db.analyser["merge_restriction_motorway_FR"] = "xxx"
france_local_db.analyser["merge_power_substation_minor_FR"] = "xxx"

Expand Down

0 comments on commit 2d0ad1d

Please sign in to comment.