-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e84d508
commit 52979e5
Showing
7 changed files
with
425 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
################## | ||
# Project Specific | ||
################## | ||
*.pyc | ||
backup/ | ||
data/ | ||
|
||
################### | ||
# Compiled source # | ||
################### | ||
*.com | ||
*.class | ||
*.dll | ||
*.exe | ||
*.o | ||
*.so | ||
|
||
############ | ||
# Packages # | ||
############ | ||
# it's better to unpack these files and commit the raw source | ||
# git has its own built in compression methods | ||
*.7z | ||
*.dmg | ||
*.gz | ||
*.iso | ||
*.jar | ||
*.rar | ||
*.tar | ||
*.zip | ||
|
||
# Logs and databases # | ||
###################### | ||
*.log | ||
*.sql | ||
*.sqlite | ||
|
||
# OS generated files # | ||
###################### | ||
.DS_Store* | ||
ehthumbs.db | ||
Icon? | ||
Thumbs.db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
clean-data: | ||
rm -rf data/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
AD Andorra | ||
AR Argentina | ||
AS American Samoa | ||
AT Austria | ||
AU Australia | ||
BD Bangladesh | ||
BE Belgium | ||
BG Bulgaria | ||
BR Brazil | ||
CA Candada | ||
CH Switzerland | ||
CZ Czech Republic | ||
DE Germany | ||
DK Denmark | ||
DO Dominican Republic | ||
ES Spain | ||
FI Finland | ||
FO Faroe Islands | ||
FR France | ||
GB Great Britain | ||
GF French Guyana | ||
GG Guernsey | ||
GL Greenland | ||
GP Guadeloupe | ||
GT Guatemala | ||
GU Guam | ||
GY Guyana | ||
HR Croatia | ||
HU Hungary | ||
IM Isle of Man | ||
IN India | ||
IS Iceland | ||
IT Italy | ||
JE Jersey | ||
JP Japan | ||
LI Liechtenstein | ||
LK Sri Lanka | ||
LT Lithuania | ||
LU Luxembourg | ||
MC Monaco | ||
MD Moldavia | ||
MH Marshall Islands | ||
MK Macedonia | ||
MP Northern Mariana Islands | ||
MQ Martinique | ||
MX Mexico | ||
MY Malaysia | ||
NL Holland | ||
NO Norway | ||
NZ New Zealand | ||
PH Phillippines | ||
PK Pakistan | ||
PL Poland | ||
PM Saint Pierre and Miquelon | ||
PR Puerto Rico | ||
PT Portugal | ||
RE French Reunion | ||
RU Russia | ||
SE Sweden | ||
SI Slovenia | ||
SJ Svalbard & Jan Mayen Islands | ||
SK Slovak Republic | ||
SM San Marino | ||
TH Thailand | ||
TR Turkey | ||
US United States | ||
VA Vatican | ||
VI Virgin Islands | ||
YT Mayotte | ||
ZA South Africa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
country,post code,place name,state,state abbreviation,ignore1,ignore2,ignore3,ignore4,latitude,longitude |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
from __future__ import with_statement | ||
import codecs | ||
import csv | ||
import sys | ||
import json | ||
import os | ||
|
||
from contextlib import closing | ||
from zipfile import ZipFile, ZIP_DEFLATED | ||
import os | ||
|
||
|
||
''' | ||
ZIP Directory Helper Function | ||
''' | ||
def zipdir(basedir, archivename): | ||
assert os.path.isdir(basedir) | ||
with closing(ZipFile(archivename, "w", ZIP_DEFLATED)) as z: | ||
|
||
# traverse directory recursively | ||
for root, dirs, files in os.walk(basedir): | ||
#ignores empty directories | ||
for fn in files: | ||
absfn = os.path.join(root, fn) | ||
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path | ||
z.write(absfn, zfn) | ||
|
||
''' | ||
Picks out all the directories and zips them | ||
''' | ||
def make_zip( countries ): | ||
|
||
print "Zipping folders" | ||
|
||
# for all the country codes | ||
for cc in countries : | ||
# make a name for the file | ||
zipname = cc+".zip" | ||
directory = os.path.join(os.getcwd(),cc) | ||
|
||
# ZIP all the folders into one | ||
zipdir(directory,zipname) | ||
|
||
# Print 10 to a line | ||
count+=1 | ||
sys.stdout.write(cc+" ") | ||
if not count%10 : | ||
print "" | ||
|
||
pass | ||
|
||
|
||
''' | ||
Made specifically for GEONAMES.ORG postal code data parsing | ||
''' | ||
def main(): | ||
|
||
if len(sys.argv) <3: | ||
print "Usage: "+sys.argv[0]+" <csv-file> <header-file>" | ||
sys.exit(-1) | ||
|
||
headerfile = sys.argv[2] | ||
csvfile = sys.argv[1] | ||
|
||
# get the headers, COMMA delimited | ||
hfile = csv.reader( open(headerfile, 'rb'), delimiter=',', quotechar='|' ) | ||
headers = hfile.next() | ||
|
||
# Print list of valid header terms | ||
print filter( lambda hh: "ignore" not in hh , headers ) | ||
|
||
# Read the TAB delimited file | ||
reader =csv.reader(open(csvfile, 'rb'), delimiter='\t', quotechar='|') | ||
|
||
# Keep track of country changes | ||
countries = set() | ||
|
||
print "Generating Files ... " | ||
|
||
for row in reader : | ||
|
||
# If not empty | ||
if row[0] != '': | ||
cc = row[0].lower() | ||
output_dir = os.path.join(os.getcwd(), cc) | ||
if not os.path.exists(output_dir): os.makedirs(output_dir); | ||
|
||
# Print if we have moved onto a new country country | ||
if cc not in countries: | ||
countries.add(cc) | ||
sys.stdout.write(cc+ " ") | ||
if not len(countries)%10 : | ||
print "" | ||
|
||
postcode = row[1] | ||
index = dict(); | ||
|
||
# Populate information | ||
for ii in range(0,len(headers)): | ||
if 'ignore' not in headers[ii]: | ||
index[ headers[ii] ] = unicode(row[ii], 'utf-8'); | ||
|
||
raw = json.dumps(index,ensure_ascii=False); | ||
fout = codecs.open(os.path.join(output_dir,postcode), encoding='utf-8', mode="w+" ) | ||
fout.write( raw ) | ||
fout.close() | ||
|
||
make_zip(countries) | ||
|
||
|
||
|
||
|
||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from __future__ import with_statement | ||
import codecs | ||
import csv | ||
import sys | ||
import json | ||
import os | ||
|
||
import os | ||
|
||
|
||
|
||
|
||
''' | ||
Made specifically for GEONAMES.ORG postal code data parsing | ||
''' | ||
def main(): | ||
|
||
if len(sys.argv) <3: | ||
print "Usage: "+sys.argv[0]+" <csv-file> <header-file>" | ||
sys.exit(-1) | ||
|
||
headerfile = sys.argv[2] | ||
csvfile = sys.argv[1] | ||
|
||
# get the headers, COMMA delimited | ||
hfile = csv.reader( open(headerfile, 'rb'), delimiter=',', quotechar='|' ) | ||
headers = hfile.next() | ||
|
||
# Print list of valid header terms | ||
print filter( lambda hh: "ignore" not in hh , headers ) | ||
|
||
# Read the TAB delimited file | ||
reader =csv.reader(open(csvfile, 'rb'), delimiter='\t', quotechar='|') | ||
|
||
# Keep track of country changes | ||
|
||
print "Generating Files for France " | ||
|
||
for row in reader : | ||
|
||
# If only france | ||
if row[0].lower() == 'fr': | ||
cc = row[0].lower() | ||
output_dir = os.path.join(os.getcwd(), cc) | ||
if not os.path.exists(output_dir): os.makedirs(output_dir); | ||
|
||
|
||
postcode = row[1].split(' ')[0] | ||
index = dict(); | ||
|
||
# Populate information | ||
for ii in range(0,len(headers)): | ||
if 'ignore' not in headers[ii]: | ||
index[ headers[ii] ] = unicode(row[ii], 'utf-8'); | ||
|
||
raw = json.dumps(index,ensure_ascii=False); | ||
fout = codecs.open(os.path.join(output_dir,postcode), encoding='utf-8', mode="w+" ) | ||
fout.write( raw ) | ||
fout.close() | ||
|
||
|
||
|
||
|
||
|
||
main() | ||
|
Oops, something went wrong.