Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parsing of version and user data #7

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
*.pyc
*.egg-info

build/
imposm/parser/pbf/OSMPBF.so
imposm/parser/pbf/osm.pb.cc
imposm/parser/pbf/osm.pb.h
5 changes: 4 additions & 1 deletion imposm/parser/pbf/multiproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ class PBFMultiProcParser(object):
relations_tag_filter = None

def __init__(self, pool_size, nodes_queue=None, ways_queue=None,
relations_queue=None, coords_queue=None, marshal_elem_data=False):
relations_queue=None, coords_queue=None, marshal_elem_data=False,
with_metadata=False):
self.pool_size = pool_size
self.nodes_callback = nodes_queue.put if nodes_queue else None
self.ways_callback = ways_queue.put if ways_queue else None
self.relations_callback = relations_queue.put if relations_queue else None
self.coords_callback = coords_queue.put if coords_queue else None
self.marshal = marshal_elem_data
self.with_metadata = with_metadata

def parse(self, filename):
pos_queue = multiprocessing.JoinableQueue(32)
pool = []
Expand Down
96 changes: 51 additions & 45 deletions imposm/parser/pbf/parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright 2011 Omniscale GmbH & Co. KG
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down Expand Up @@ -35,20 +35,22 @@ class PBFParser(object):
"""
OSM PBF parser.

:param xxx_callback:
:param xxx_callback:
callback functions for coords, nodes, ways and relations.
Each callback function gets called with a list of multiple elements.

:param xxx_filter:
functions that can manipulate the tag dictionary.
Nodes and relations without tags will not passed to the callback.

:param marshal:
return the data as a marshaled string
"""
def __init__(self, nodes_callback=None, ways_callback=None,
relations_callback=None, coords_callback=None, nodes_tag_filter=None,
ways_tag_filter=None, relations_tag_filter=None, marshal=False):
relations_callback=None, coords_callback=None,
nodes_tag_filter=None, ways_tag_filter=None,
relations_tag_filter=None, marshal=False,
with_metadata=False):
self.nodes_callback = nodes_callback
self.ways_callback = ways_callback
self.relations_callback = relations_callback
Expand All @@ -57,24 +59,28 @@ def __init__(self, nodes_callback=None, ways_callback=None,
self.ways_tag_filter = ways_tag_filter
self.relations_tag_filter = relations_tag_filter
self.marshal = marshal

self.with_metadata = with_metadata
if self.with_metadata:
raise Exception("PBFParser doesn't support parsing of metadata. "
"You set with_metadata on True")

def parse(self, filename, offset, size):
"""
Parse primitive block from `filename`.

:param filename: path to PBF file
:param offset: byte offset of the primitive block to parse
:param size: size in bytes of the primitive block to parse
"""
reader = PrimitiveBlockParser(filename, offset, size)

if self.nodes_callback or self.coords_callback:
self.handle_nodes(reader)
if self.ways_callback:
self.handle_ways(reader)
if self.relations_callback:
self.handle_relations(reader)

def handle_nodes(self, reader):
nodes = []
coords = []
Expand Down Expand Up @@ -142,14 +148,14 @@ def decoded_stringtable(stringtable):
class PrimitiveBlockParser(object):
"""
Low level PBF primitive block parser.

Parses a single primitive block and handles OSM PBF internals like
dense nodes, delta encoding, stringtables, etc.

:param filename: path to PBF file
:param offset: byte offset of the primitive block to parse
:param size: size in bytes of the primitive block to parse

"""
def __init__(self, filename, blob_pos, blob_size):
self.pos = filename, blob_pos, blob_size
Expand All @@ -158,17 +164,17 @@ def __init__(self, filename, blob_pos, blob_size):
self.primitive_block.ParseFromString(data)
self.primitivegroup = self.primitive_block.primitivegroup
self.stringtable = decoded_stringtable(self.primitive_block.stringtable.s)

def __repr__(self):
return '<PrimitiveBlockParser %r>' % (self.pos, )

def _get_tags(self, element, pos):
tags = {}
key = None
value = None
keyflag = False
if pos >= len(element):
return {}, pos
return {}, pos
while True:
key_val = element[pos]
pos += 1
Expand All @@ -182,11 +188,11 @@ def _get_tags(self, element, pos):
tags[self.stringtable[key]] = self.stringtable[value]
keyflag = False
return tags, pos

def nodes(self):
"""
Return an iterator for all *nodes* in this primitive block.

:rtype: iterator of ``(osm_id, tags, (lon, lat))`` tuples
"""
for group in self.primitivegroup:
Expand Down Expand Up @@ -216,11 +222,11 @@ def nodes(self):
for i in xrange(len(keys)):
tags.append((self.stringtable[keys[i]], self.stringtable[vals[i]]))
yield (node.id, tags, (node.lon, node.lat))

def ways(self):
"""
Return an iterator for all *ways* in this primitive block.

:rtype: iterator of ``(osm_id, tags, [ref1, ref2, ...])`` tuples
"""
for group in self.primitivegroup:
Expand All @@ -230,7 +236,7 @@ def ways(self):
keys = way.keys
vals = way.vals
delta_refs = way.refs

tags = {}
for i in xrange(len(keys)):
tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]]
Expand All @@ -240,13 +246,13 @@ def ways(self):
ref += delta
refs.append(ref)
yield (way.id, tags, refs)

def relations(self):
"""
Return an iterator for all *relations* in this primitive block.

:rtype: iterator of ``(osm_id, tags, [(ref1, type, role), ...])`` tuples

"""
for group in self.primitivegroup:
relations = group.relations
Expand All @@ -266,13 +272,13 @@ def relations(self):
for i in xrange(len(keys)):
tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]]
yield (relation.id, tags, members)

class PBFHeader(object):
def __init__(self, filename, blob_pos, blob_size):
data = read_blob_data(filename, blob_pos, blob_size)
self.header_block = OSMPBF.HeaderBlock()
self.header_block.ParseFromString(data)

def required_features(self):
return set(self.header_block.required_features)

Expand All @@ -284,7 +290,7 @@ def read_blob_data(filename, blob_pos, blob_size):
with open(filename, 'rb') as f:
f.seek(blob_pos)
blob_data = f.read(blob_size)

blob = OSMPBF.Blob()
blob.ParseFromString(blob_data)
raw_data = blob.raw
Expand All @@ -297,10 +303,10 @@ def read_blob_data(filename, blob_pos, blob_size):
class PBFFile(object):
"""
OSM PBF file reader.

Parses the low-level file structure with header sizes,
offsets and blob headers.

:param filename: path to the PBF file
"""
def __init__(self, filename):
Expand All @@ -310,40 +316,40 @@ def __init__(self, filename):
header_offsets = self._skip_header()
self.header = PBFHeader(self.filename, header_offsets['blob_pos'], header_offsets['blob_size'])
self.check_features()

def check_features(self):
missing_features = self.header.required_features().difference(SUPPORTED_FEATURES)
if missing_features:
raise NotImplementedError(
'%s requires features not implemented by this parser: %s' %
(self.filename, ', '.join(missing_features))
)

def _skip_header(self):
return self.blob_offsets().next()

def seek(self, pos):
self.next_blob_pos = pos

def rewind(self):
self.next_blob_pos = self.prev_blob_pos

def blob_offsets(self):
"""
Returns an iterator of the blob offsets in this file.

Each offsets is stored in a dictionary with:

- `filename` the path of this PBF file.
- `blob_pos` the byte offset
- `blob_size` the size of this blob in bytes
"""
while True:
self.file.seek(self.next_blob_pos)

blob_header_size = self._blob_header_size()
if not blob_header_size: break

blob_size = self._blob_size(self.file.read(blob_header_size))
blob_pos = self.next_blob_pos + 4 + blob_header_size
blob_header_pos=self.next_blob_pos,
Expand All @@ -354,22 +360,22 @@ def blob_offsets(self):
blob_header_pos=blob_header_pos,
prev_blob_header_pos=prev_blob_header_pos,
filename=self.filename)

def primitive_block_parsers(self):
"""
Returns an iterator of PrimitiveBlockParser.
"""
for pos in self.blob_offsets():
yield PrimitiveBlockParser(self.filename, pos['blob_pos'], pos['blob_size'])

def _blob_size(self, data):
blob_header = OSMPBF.BlobHeader()
blob_header.ParseFromString(data)
return blob_header.datasize

def _blob_header_size(self):
bytes = self.file.read(4)
if bytes:
if bytes:
return struct.unpack('!i', bytes)[0]
return None

Expand All @@ -393,5 +399,5 @@ def read_pbf(filename):
times = t.repeat(r,n)
avrg_times = []
for time in times:
avrg_times.append(time/n)
avrg_times.append(time/n)
print "avrg time/call: %f" %(min(avrg_times))
Loading