Skip to content

Commit

Permalink
Merge pull request #245 from lonvia/file-processor
Browse files Browse the repository at this point in the history
Introduce iterative processing
  • Loading branch information
lonvia authored Mar 17, 2024
2 parents 5e231e4 + e3a5a6d commit e3038c3
Show file tree
Hide file tree
Showing 23 changed files with 776 additions and 120 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ pybind11_add_module(_osmium
lib/merge_input_reader.cc
lib/node_location_handler.cc
lib/simple_writer.cc
lib/write_handler.cc)
lib/write_handler.cc
lib/file_iterator.cc)
set_module_output(_osmium osmium)
pybind11_add_module(_replication lib/replication.cc)
set_module_output(_replication osmium/replication)
Expand Down
24 changes: 7 additions & 17 deletions examples/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,20 @@

import sys

class Convert(o.SimpleHandler):

def __init__(self, writer):
super(Convert, self).__init__()
self.writer = writer

def node(self, n):
self.writer.add_node(n)

def way(self, w):
self.writer.add_way(w)

def relation(self, r):
self.writer.add_relation(r)

if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python convert.py <infile> <outfile>")
sys.exit(-1)

writer = o.SimpleWriter(sys.argv[2])
handler = Convert(writer)

handler.apply_file(sys.argv[1])
for obj in o.FileProcessor(sys.argv[1]):
if obj.is_node():
writer.add_node(obj)
elif obj.is_way():
writer.add_way(obj)
elif obj.is_relation():
writer.add_relation(obj)

writer.close()

2 changes: 1 addition & 1 deletion examples/convert_to_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def print_object(self, geojson, tags):
def main(osmfile):
handler = GeoJsonWriter()

handler.apply_file(osmfile)
handler.apply_file(osmfile,filters=[o.filter.EmptyTagFilter().apply_to(o.osm.NODE)])
handler.finish()

return 0
Expand Down
50 changes: 19 additions & 31 deletions examples/filter_coastlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,9 @@
we are interested in and remember the nodes required. Then, in a second
run all the relevant nodes and ways are written out.
"""

import osmium as o
import sys

class WayFilter(o.SimpleHandler):

def __init__(self):
super(WayFilter, self).__init__()
self.nodes = set()

def way(self, w):
if 'natural' in w.tags and w.tags['natural'] == 'coastline':
for n in w.nodes:
self.nodes.add(n.ref)


class CoastlineWriter(o.SimpleHandler):

def __init__(self, writer, nodes):
super(CoastlineWriter, self).__init__()
self.writer = writer
self.nodes = nodes

def node(self, n):
if n.id in self.nodes:
self.writer.add_node(n)

def way(self, w):
if 'natural' in w.tags and w.tags['natural'] == 'coastline':
self.writer.add_way(w)


if __name__ == '__main__':
if len(sys.argv) != 3:
Expand All @@ -46,11 +18,27 @@ def way(self, w):


# go through the ways to find all relevant nodes
ways = WayFilter()
ways.apply_file(sys.argv[1])
nodes = set()
# Pre-filter the ways by tags. The less object we need to look at, the better.
way_filter = o.filter.KeyFilter('natural')
# only scan the ways of the file
for obj in o.FileProcessor(sys.argv[1], o.osm.WAY).with_filter(way_filter):
if obj.tags['natural'] == 'coastline':
nodes.update(n.ref for n in obj.nodes)


# go through the file again and write out the data
writer = o.SimpleWriter(sys.argv[2])
CoastlineWriter(writer, ways.nodes).apply_file(sys.argv[1])

# This time the pre-filtering should only apply to ways.
way_filter = o.filter.KeyFilter('natural').enable_for(o.osm.WAY)

# We need nodes and ways in the second pass.
for obj in o.FileProcessor(sys.argv[1], o.osm.WAY | o.osm.NODE).with_filter(way_filter):
if obj.is_node() and obj.id in nodes:
# Strip the object of tags along the way
writer.add_node(obj.replace(tags={}))
elif obj.is_way() and obj.tags['natural'] == 'coastline':
writer.add_way(obj)

writer.close()
25 changes: 11 additions & 14 deletions examples/osm_diff_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import osmium as o
import sys

class Stats(object):
class Stats:

def __init__(self):
self.added = 0
self.modified = 0
self.deleted = 0

def __call__(self, o):
def add(self, o):
if o.deleted:
self.deleted += 1
elif o.version == 1:
Expand All @@ -23,23 +23,20 @@ def __call__(self, o):
self.modified += 1

def outstats(self, prefix):
print("%s added: %d" % (prefix, self.added))
print("%s modified: %d" % (prefix, self.modified))
print("%s deleted: %d" % (prefix, self.deleted))
print(f"{prefix} added: {self.added}")
print(f"{prefix} modified: {self.modified}")
print(f"{prefix} deleted: {self.deleted}")


def main(osmfile):
nodes = Stats()
ways = Stats()
rels = Stats()
stats = {t: Stats() for t in 'nwr'}

h = o.make_simple_handler(node=nodes, way=ways, relation=rels)
for obj in o.FileProcessor(osmfile):
stats[obj.type_str()].add(obj)

h.apply_file(osmfile)

nodes.outstats("Nodes")
ways.outstats("Ways")
rels.outstats("Relations")
stats['n'].outstats("Nodes")
stats['w'].outstats("Ways")
stats['r'].outstats("Relations")

return 0

Expand Down
5 changes: 1 addition & 4 deletions examples/osm_url_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
"""
import osmium as o
import sys
try:
import urllib.request as urlrequest
except ImportError:
import urllib2 as urlrequest
import urllib.request as urlrequest

class FileStatsHandler(o.SimpleHandler):
def __init__(self):
Expand Down
18 changes: 5 additions & 13 deletions examples/pub_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,12 @@
import osmium
import sys

class NamesHandler(osmium.SimpleHandler):

def output_pubs(self, tags):
if tags.get('amenity') == 'pub' and 'name' in tags:
print(tags['name'])

def node(self, n):
self.output_pubs(n.tags)

def way(self, w):
self.output_pubs(w.tags)

def main(osmfile):
NamesHandler().apply_file(osmfile)
for obj in osmium.FileProcessor(osmfile)\
.with_filter(osmium.filter.KeyFilter('amenity'))\
.with_filter(osmium.filter.KeyFilter('name')):
if obj.tags['amenity'] == 'pub':
print(obj.tags['name'])

return 0

Expand Down
27 changes: 11 additions & 16 deletions examples/road_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,22 @@
import osmium as o
import sys

class RoadLengthHandler(o.SimpleHandler):
def __init__(self):
super(RoadLengthHandler, self).__init__()
self.length = 0.0

def way(self, w):
if 'highway' in w.tags:
def main(osmfile):
total = 0.0
# As we need the way geometry, the node locations need to be cached.
# This is enabled with the with_locations() function.
for obj in o.FileProcessor(osmfile, o.osm.NODE | o.osm.WAY)\
.with_locations()\
.with_filter(o.filter.KeyFilter('highway')):
if obj.is_way():
try:
self.length += o.geom.haversine_distance(w.nodes)
total += o.geom.haversine_distance(obj.nodes)
except o.InvalidLocationError:
# A location error might occur if the osm file is an extract
# where nodes of ways near the boundary are missing.
print("WARNING: way %d incomplete. Ignoring." % w.id)

def main(osmfile):
h = RoadLengthHandler()
# As we need the geometry, the node locations need to be cached. Therefore
# set 'locations' to true.
h.apply_file(osmfile, locations=True)
print("WARNING: way %d incomplete. Ignoring." % obj.id)

print('Total way length: %.2f km' % (h.length/1000))
print('Total way length: %.2f km' % (total/1000))

return 0

Expand Down
5 changes: 2 additions & 3 deletions examples/use_nodecache.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import osmium as o
import sys

class WayHandler(o.SimpleHandler):
class WayHandler:

def __init__(self, idx):
super(WayHandler, self).__init__()
self.idx = idx

def way(self, w):
Expand All @@ -13,7 +12,7 @@ def way(self, w):
print("%d %s" % (w.id, len(w.nodes)))

if len(sys.argv) != 3:
print("Usage: python create_nodecache.py <osm file> <node cache>")
print("Usage: python use_nodecache.py <osm file> <node cache>")
exit()

reader = o.io.Reader(sys.argv[1], o.osm.osm_entity_bits.WAY)
Expand Down
54 changes: 46 additions & 8 deletions lib/area.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,21 @@

#include "base_handler.h"
#include "handler_chain.h"
#include "buffer_iterator.h"

namespace py = pybind11;

namespace {

using MpManager = osmium::area::MultipolygonManager<osmium::area::Assembler>;

class AreaManagerSecondPassHandler : public BaseHandler
class AreaManagerSecondPassHandlerBase : public BaseHandler
{
public:
AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args)
: m_mp_manager(mp_manager), m_args(args), m_handlers(m_args)
{
m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab)
{ osmium::apply(ab, this->m_handlers); });
}
AreaManagerSecondPassHandlerBase(MpManager *mp_manager)
: m_mp_manager(mp_manager)
{}


bool node(osmium::Node const *n) override
{
Expand All @@ -53,10 +52,37 @@ class AreaManagerSecondPassHandler : public BaseHandler
m_mp_manager->flush_output();
}

private:
protected:
MpManager *m_mp_manager;
};


class AreaManagerSecondPassHandler : public AreaManagerSecondPassHandlerBase
{
public:
AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args)
: AreaManagerSecondPassHandlerBase(mp_manager), m_args(args), m_handlers(m_args)
{
m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab)
{ osmium::apply(ab, this->m_handlers); });
}

private:
py::args m_args;
HandlerChain m_handlers;

};


class AreaManagerBufferHandler : public AreaManagerSecondPassHandlerBase
{
public:
AreaManagerBufferHandler(MpManager *mp_manager, pyosmium::BufferIterator *cb)
: AreaManagerSecondPassHandlerBase(mp_manager)
{
m_mp_manager->set_callback([cb](osmium::memory::Buffer &&ab)
{ cb->add_buffer(std::move(ab)); });
}
};


Expand All @@ -82,6 +108,12 @@ class AreaManager : public BaseHandler
return new AreaManagerSecondPassHandler(&m_mp_manager, args);
}

AreaManagerBufferHandler *second_pass_to_buffer(pyosmium::BufferIterator *cb)
{
m_mp_manager.prepare_for_lookup();
return new AreaManagerBufferHandler(&m_mp_manager, cb);
}

private:
osmium::area::Assembler::config_type m_assembler_config;
osmium::area::MultipolygonManager<osmium::area::Assembler> m_mp_manager;
Expand All @@ -93,6 +125,8 @@ PYBIND11_MODULE(_area, m)
{
py::class_<AreaManagerSecondPassHandler, BaseHandler>(m,
"AreaManagerSecondPassHandler");
py::class_<AreaManagerBufferHandler, BaseHandler>(m,
"AreaManagerBufferHandler");

py::class_<AreaManager, BaseHandler>(m, "AreaManager",
"Object manager class that manages building area objects from "
Expand All @@ -107,5 +141,9 @@ PYBIND11_MODULE(_area, m)
"file, where areas are assembled. Pass the handlers that "
"should handle the areas.",
py::return_value_policy::take_ownership, py::keep_alive<1, 2>())
.def("second_pass_to_buffer", &AreaManager::second_pass_to_buffer,
py::keep_alive<1, 2>(),
"Return a handler object for the second pass of the file. "
"The handler holds a buffer, which can be iterated over.")
;
}
Loading

0 comments on commit e3038c3

Please sign in to comment.