diff --git a/Cargo.lock b/Cargo.lock index 529587d..368b9e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -327,6 +327,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "quick-xml" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.23" @@ -404,9 +413,9 @@ dependencies = [ "osmpbf", "owning_ref", "png", + "quick-xml", "stb_truetype", "tini", - "xml-rs", ] [[package]] @@ -519,9 +528,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "xml-rs" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" diff --git a/Cargo.toml b/Cargo.toml index 7801e66..e6e08d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,9 +12,9 @@ indexmap = "*" memmap = "*" owning_ref = "*" png = "*" +quick-xml = "*" stb_truetype = "*" tini = "*" -xml-rs = "*" [dependencies.osmpbf] version = "*" diff --git a/src/geodata/importer.rs b/src/geodata/importer.rs index 9a7884c..f640c48 100644 --- a/src/geodata/importer.rs +++ b/src/geodata/importer.rs @@ -4,6 +4,10 @@ use crate::geodata::saver::save_to_internal_format; use anyhow::{anyhow, bail, Context, Result}; #[cfg(feature = "pbf")] use osmpbf::{Element, ElementReader, RelMemberType}; +use quick_xml::events::attributes::Attributes; +use quick_xml::events::{BytesStart, Event}; +use quick_xml::reader::Reader; +use std::borrow::Cow; use std::collections::HashSet; use std::collections::{BTreeMap, HashMap}; use std::ffi::OsStr; @@ -11,8 +15,6 @@ use std::fs::File; use std::io::prelude::*; use std::io::{BufReader, BufWriter}; use std::path::Path; -use xml::attribute::OwnedAttribute; -use xml::reader::{EventReader, XmlEvent}; pub fn import>(input: P, output: P) -> Result<()> { let output_file = File::create(output.as_ref()).context(format!( @@ -27,7 +29,7 @@ pub fn import>(input: P, output: P) -> Result<()> { "Failed to open {} for reading", input.as_ref().to_string_lossy() ))?; - let parser = EventReader::new(BufReader::new(input_file)); + let parser = Reader::from_reader(BufReader::new(input_file)); parse_osm_xml(parser)? } #[cfg(feature = "pbf")] @@ -181,7 +183,7 @@ fn parse_pbf>(input: P) -> Result { Ok(entity_storages) } -fn parse_osm_xml(mut parser: EventReader) -> Result { +fn parse_osm_xml(mut parser: Reader) -> Result { let mut entity_storages = EntityStorages { node_storage: OsmEntityStorage::new(), way_storage: OsmEntityStorage::new(), @@ -192,19 +194,33 @@ fn parse_osm_xml(mut parser: EventReader) -> Result let mut elem_count = 0; println!("Parsing XML"); + let mut buf = Vec::new(); loop { - let e = parser.next().context("Failed to parse the input file")?; - match e { - XmlEvent::EndDocument => break, - XmlEvent::StartElement { name, attributes, .. } => { - process_element(&name.local_name, &attributes, &mut entity_storages, &mut parser)?; - elem_count += 1; - if elem_count % 100_000 == 0 { - print_storage_stats(&entity_storages); - } + let e = parser + .read_event_into(&mut buf) + .context("Failed to parse the input file")?; + let mut on_elem = |start: BytesStart, have_subelements: bool| -> Result<()> { + process_element( + &mut parser, + start.local_name().as_ref(), + &mut start.attributes(), + &mut entity_storages, + have_subelements, + )?; + elem_count += 1; + if elem_count % 100_000 == 0 { + print_storage_stats(&entity_storages); } + Ok(()) + }; + match e { + Event::Eof => break, + Event::Start(start) => on_elem(start, true)?, + Event::Empty(start) => on_elem(start, false)?, _ => {} } + // The official `quick-xml` examples suggests we do this to save memory. + buf.clear(); } print_storage_stats(&entity_storages); @@ -212,46 +228,53 @@ fn parse_osm_xml(mut parser: EventReader) -> Result Ok(entity_storages) } -fn process_element( - name: &str, - attrs: &[OwnedAttribute], +fn process_element( + parser: &mut Reader, + name: &[u8], + attrs: &mut Attributes, entity_storages: &mut EntityStorages, - parser: &mut EventReader, + have_subelements: bool, ) -> Result<()> { match name { - "node" => { + b"node" => { let mut node = RawNode { - global_id: get_id(name, attrs)?, - lat: parse_required_attr(name, attrs, "lat")?, - lon: parse_required_attr(name, attrs, "lon")?, + global_id: get_id(parser, name, attrs)?, + lat: parse_required_attr(parser, name, attrs, b"lat")?, + lon: parse_required_attr(parser, name, attrs, b"lon")?, tags: RawTags::default(), }; - process_subelements(name, &mut node, entity_storages, process_node_subelement, parser)?; + if have_subelements { + process_subelements(name, &mut node, entity_storages, process_node_subelement, parser)?; + } entity_storages.node_storage.add(node.global_id, node); } - "way" => { + b"way" => { let mut way = RawWay { - global_id: get_id(name, attrs)?, + global_id: get_id(parser, name, attrs)?, node_ids: RawRefs::default(), tags: RawTags::default(), }; - process_subelements(name, &mut way, entity_storages, process_way_subelement, parser)?; + if have_subelements { + process_subelements(name, &mut way, entity_storages, process_way_subelement, parser)?; + } postprocess_node_refs(&mut way.node_ids); entity_storages.way_storage.add(way.global_id, way); } - "relation" => { + b"relation" => { let mut relation = RawRelation { - global_id: get_id(name, attrs)?, + global_id: get_id(parser, name, attrs)?, way_refs: Vec::::default(), tags: RawTags::default(), }; - process_subelements( - name, - &mut relation, - entity_storages, - process_relation_subelement, - parser, - )?; + if have_subelements { + process_subelements( + name, + &mut relation, + entity_storages, + process_relation_subelement, + parser, + )?; + } if relation.tags.iter().any(|(k, v)| k == "type" && v == "multipolygon") { let segments = relation.to_segments(entity_storages); if let Some(polygons) = find_polygons_in_multipolygon(relation.global_id, &segments) { @@ -275,29 +298,35 @@ fn process_element( Ok(()) } -fn process_subelements( - entity_name: &str, +fn process_subelements( + entity_name: &[u8], entity: &mut E, entity_storages: &EntityStorages, subelement_processor: F, - parser: &mut EventReader, + parser: &mut Reader, ) -> Result<()> where - F: Fn(&mut E, &EntityStorages, &str, &[OwnedAttribute]) -> Result<()>, + F: Fn(&mut Reader, &mut E, &EntityStorages, &[u8], &mut Attributes) -> Result<()>, { + let mut buf = Vec::new(); loop { - let e = parser.next().context(format!( + let e = parser.read_event_into(&mut buf).context(format!( "Failed to parse the input file when processing {}", - entity_name + ascii_name_as_str(entity_name) ))?; match e { - XmlEvent::EndDocument => break, - XmlEvent::EndElement { ref name } if name.local_name == *entity_name => break, - XmlEvent::StartElement { name, attributes, .. } => { - subelement_processor(entity, entity_storages, &name.local_name, &attributes)? - } + Event::Eof => break, + Event::End(end) if end.local_name().as_ref() == entity_name => break, + Event::Start(start) | Event::Empty(start) => subelement_processor( + parser, + entity, + entity_storages, + start.local_name().as_ref(), + &mut start.attributes(), + )?, _ => {} } + buf.clear(); } Ok(()) } @@ -323,95 +352,125 @@ fn postprocess_node_refs(refs: &mut RawRefs) { *refs = refs_without_duplicates; } -fn process_node_subelement( +fn process_node_subelement( + parser: &mut Reader, node: &mut RawNode, _: &EntityStorages, - sub_name: &str, - sub_attrs: &[OwnedAttribute], + sub_name: &[u8], + sub_attrs: &mut Attributes, ) -> Result<()> { - try_add_tag(sub_name, sub_attrs, &mut node.tags).map(|_| ()) + try_add_tag(parser, sub_name, sub_attrs, &mut node.tags).map(|_| ()) } -fn process_way_subelement( +fn process_way_subelement( + parser: &mut Reader, way: &mut RawWay, entity_storages: &EntityStorages, - sub_name: &str, - sub_attrs: &[OwnedAttribute], + sub_name: &[u8], + sub_attrs: &mut Attributes, ) -> Result<()> { - if try_add_tag(sub_name, sub_attrs, &mut way.tags)? { + if try_add_tag(parser, sub_name, sub_attrs, &mut way.tags)? { return Ok(()); } - if sub_name == "nd" { - if let Some(r) = get_ref(sub_name, sub_attrs, &entity_storages.node_storage)? { + if sub_name == b"nd" { + if let Some(r) = get_ref(parser, sub_name, sub_attrs, &entity_storages.node_storage)? { way.node_ids.push(r); } } Ok(()) } -fn process_relation_subelement( +fn process_relation_subelement( + parser: &mut Reader, relation: &mut RawRelation, entity_storages: &EntityStorages, - sub_name: &str, - sub_attrs: &[OwnedAttribute], + sub_name: &[u8], + sub_attrs: &mut Attributes, ) -> Result<()> { - if try_add_tag(sub_name, sub_attrs, &mut relation.tags)? { + if try_add_tag(parser, sub_name, sub_attrs, &mut relation.tags)? { return Ok(()); } - if sub_name == "member" && get_required_attr(sub_name, sub_attrs, "type")? == "way" { - if let Some(r) = get_ref(sub_name, sub_attrs, &entity_storages.way_storage)? { - let is_inner = get_required_attr(sub_name, sub_attrs, "role")? == "inner"; + if sub_name == b"member" && get_required_attr(parser, sub_name, sub_attrs, b"type")? == "way" { + if let Some(r) = get_ref(parser, sub_name, sub_attrs, &entity_storages.way_storage)? { + let is_inner = get_required_attr(parser, sub_name, sub_attrs, b"role")? == "inner"; relation.way_refs.push(RelationWayRef { way_id: r, is_inner }); } } Ok(()) } -fn get_required_attr<'a>(elem_name: &str, attrs: &'a [OwnedAttribute], attr_name: &str) -> Result<&'a String> { - attrs - .iter() - .filter(|x| x.name.local_name == attr_name) - .map(|x| &x.value) - .next() - .ok_or_else(|| anyhow!("Element {} doesn't have required attribute: {}", elem_name, attr_name)) +fn ascii_name_as_str(elem_name: &[u8]) -> &str { + std::str::from_utf8(elem_name).unwrap_or("N/A") +} + +fn get_required_attr<'a, R: BufRead>( + parser: &mut Reader, + elem_name: &[u8], + attrs: &mut Attributes<'a>, + attr_name: &[u8], +) -> Result> { + for attr in attrs { + let attr = attr?; + if attr.key.local_name().as_ref() == attr_name { + return Ok(attr.decode_and_unescape_value(parser)?); + } + } + Err(anyhow!( + "Element {} doesn't have required attribute: {}", + ascii_name_as_str(elem_name), + ascii_name_as_str(attr_name) + )) } -fn parse_required_attr(elem_name: &str, attrs: &[OwnedAttribute], attr_name: &str) -> Result +fn parse_required_attr( + parser: &mut Reader, + elem_name: &[u8], + attrs: &mut Attributes, + attr_name: &[u8], +) -> Result where T: std::str::FromStr, T::Err: std::error::Error + Send + Sync + 'static, { - let value = get_required_attr(elem_name, attrs, attr_name)?; + let value = get_required_attr(parser, elem_name, attrs, attr_name)?; let parsed_value = value.parse::().context(format!( "Failed to parse the value of attribute {} ({}) for element {}", - attr_name, value, elem_name + ascii_name_as_str(attr_name), + value, + ascii_name_as_str(elem_name) ))?; Ok(parsed_value) } -fn get_ref( - elem_name: &str, - attrs: &[OwnedAttribute], +fn get_ref( + parser: &mut Reader, + elem_name: &[u8], + attrs: &mut Attributes, storage: &OsmEntityStorage, ) -> Result> { - let reference = parse_required_attr(elem_name, attrs, "ref")?; + let reference = parse_required_attr(parser, elem_name, attrs, b"ref")?; Ok(storage.translate_id(reference)) } -fn try_add_tag<'a>(elem_name: &str, attrs: &'a [OwnedAttribute], tags: &mut RawTags) -> Result { - if elem_name != "tag" { +fn try_add_tag( + parser: &mut Reader, + elem_name: &[u8], + attrs: &mut Attributes, + tags: &mut RawTags, +) -> Result { + if elem_name != b"tag" { return Ok(false); } - let key = get_required_attr(elem_name, attrs, "k")?; - let value = get_required_attr(elem_name, attrs, "v")?; - tags.insert(key.clone(), value.clone()); + let key = get_required_attr(parser, elem_name, attrs, b"k")?; + let value = get_required_attr(parser, elem_name, attrs, b"v")?; + tags.insert(key.to_string(), value.to_string()); Ok(true) } -fn get_id(elem_name: &str, attrs: &[OwnedAttribute]) -> Result { - parse_required_attr(elem_name, attrs, "id") +fn get_id(parser: &mut Reader, elem_name: &[u8], attrs: &mut Attributes) -> Result { + parse_required_attr(parser, elem_name, attrs, b"id") } pub(super) type RawRefs = Vec;