Skip to content

Commit

Permalink
replace czlib with go-libdeflate
Browse files Browse the repository at this point in the history
  • Loading branch information
oflebbe committed Sep 23, 2023
1 parent e652b50 commit bd5d16b
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 56 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ BenchmarkChange_UnmarshalJSON-12 287707 317723 +10.43%
## CGO and zlib

OSM PBF data comes in blocks, each block is zlib compressed. Decompressing this
data takes about 33% of the total read time. [DataDog/czlib](https://github.com/DataDog/czlib) is
used to speed this process.
See [osmpbf/README.md](osmpbf#using-cgoczlib-for-decompression) for more details.
data takes about 33% of the total read time. [4kills/go-libdeflate](https://github.com/4kills/go-libdeflate) is
used to speed up decompressing.
See [osmpbf/README.md](osmpbf#using-libdeflate-for-decompression) for more details.

As a result, a C compiler is necessary to install this module. On macOS this may require
installing pkg-config using something like `brew install pkg-config`
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/paulmach/osm
go 1.13

require (
github.com/datadog/czlib v0.0.0-20160811164712-4bc9a24e37f2
github.com/4kills/go-libdeflate/v2 v2.0.3
github.com/paulmach/orb v0.1.3
github.com/paulmach/protoscan v0.2.1
golang.org/x/time v0.0.0-20190921001708-c4c64cad1fd0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/datadog/czlib v0.0.0-20160811164712-4bc9a24e37f2 h1:ISaMhBq2dagaoptFGUyywT5SzpysCbHofX3sCNw1djo=
github.com/datadog/czlib v0.0.0-20160811164712-4bc9a24e37f2/go.mod h1:2yDaWzisHKoQoxm+EU4YgKBaD7g1M0pxy7THWG44Lro=
github.com/4kills/go-libdeflate/v2 v2.0.3 h1:Y13oRUvtAXFJkcW4F0MnaQQB753a71sTutGrVbEAubQ=
github.com/4kills/go-libdeflate/v2 v2.0.3/go.mod h1:hyouZv4OAhHaaMpYuejstUN0xOg8mA+yy75WE3Ty6SM=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
Expand Down
48 changes: 28 additions & 20 deletions osmpbf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,31 +80,39 @@ This package supports reading OSM PBF files where the ways have been annotated w

Coordinates are stored in the `Lat` and `Lon` fields of each `WayNode`. There is no need to specify an explicit option; when the node locations are present on the ways, they are loaded automatically. For more info about the OSM PBF format extension, see [the original blog post](https://blog.jochentopf.com/2016-04-20-node-locations-on-ways.html).

## Using cgo/czlib for decompression
## Using cgo libdeflate for decompression

OSM PBF files are a set of blocks that are zlib compressed. When using the pure golang
implementation this can account for about 1/3 of the read time. When cgo is enabled
the package will used [czlib](https://github.com/DataDog/czlib).
the package [go-libdeflate](https://github.com/4kills/libdeflate) will used.

Previous versions used the lib czlib based on zlib. libdeflate is more performant
and more memory efficient for uncompressing.

```
$ CGO_ENABLED=0 go test -bench . > disabled.txt
$ CGO_ENABLED=1 go test -bench . > enabled.txt
$ benchcmp disabled.txt enabled.txt
benchmark old ns/op new ns/op delta
BenchmarkLondon-12 312294630 229927205 -26.37%
BenchmarkLondon_nodes-12 246562457 160021768 -35.10%
BenchmarkLondon_ways-12 216803544 134747327 -37.85%
BenchmarkLondon_relations-12 158722633 80560144 -49.24%
benchmark old allocs new allocs delta
BenchmarkLondon-12 2469128 2416804 -2.12%
BenchmarkLondon_nodes-12 1056166 1003850 -4.95%
BenchmarkLondon_ways-12 1845032 1792716 -2.84%
BenchmarkLondon_relations-12 509090 456772 -10.28%
benchmark old bytes new bytes delta
BenchmarkLondon-12 963734544 954877896 -0.92%
BenchmarkLondon_nodes-12 658337435 649482060 -1.35%
BenchmarkLondon_ways-12 441674734 432819378 -2.00%
BenchmarkLondon_relations-12 187941609 179086389 -4.71%
```
benchmark old ns/op new ns/op delta
BenchmarkLondon-8 361519289 275254714 -23.86%
BenchmarkLondon_withFiltersTrue-8 392469042 263935960 -32.75%
BenchmarkLondon_withFiltersFalse-8 310824940 200477972 -35.50%
BenchmarkLondon_nodes-8 295277528 180614979 -38.83%
BenchmarkLondon_ways-8 257494509 140700970 -45.36%
BenchmarkLondon_relations-8 189490128 75263200 -60.28%
benchmark old allocs new allocs delta
BenchmarkLondon-8 4863784 4808526 -1.14%
BenchmarkLondon_withFiltersTrue-8 4863786 4808515 -1.14%
BenchmarkLondon_withFiltersFalse-8 1419995 1364724 -3.89%
BenchmarkLondon_nodes-8 3450825 3395559 -1.60%
BenchmarkLondon_ways-8 1851359 1796099 -2.98%
BenchmarkLondon_relations-8 515422 460152 -10.72%
benchmark old bytes new bytes delta
BenchmarkLondon-8 947061317 924789892 -2.35%
BenchmarkLondon_withFiltersTrue-8 947061146 924787588 -2.35%
BenchmarkLondon_withFiltersFalse-8 388725836 366452840 -5.73%
BenchmarkLondon_nodes-8 641663624 619391213 -3.47%
BenchmarkLondon_ways-8 460631859 438360054 -4.84%
BenchmarkLondon_relations-8 206899749 184626277 -10.77%
23 changes: 1 addition & 22 deletions osmpbf/decode.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package osmpbf

import (
"bytes"
"context"
"encoding/binary"
"errors"
Expand Down Expand Up @@ -344,28 +343,8 @@ func getData(blob *osmpbf.Blob, data []byte) ([]byte, error) {
return blob.GetRaw(), nil

case blob.ZlibData != nil:
r, err := zlibReader(blob.GetZlibData())
if err != nil {
return nil, err
}

// using the bytes.Buffer allows for the preallocation of the necessary space.
l := blob.GetRawSize() + bytes.MinRead
if cap(data) < int(l) {
data = make([]byte, 0, l+l/10)
} else {
data = data[:0]
}
buf := bytes.NewBuffer(data)
if _, err = buf.ReadFrom(r); err != nil {
return nil, err
}

if buf.Len() != int(blob.GetRawSize()) {
return nil, fmt.Errorf("raw blob data size %d but expected %d", buf.Len(), blob.GetRawSize())
}
return decompress(blob.GetZlibData(), (int)(blob.GetRawSize()), data)

return buf.Bytes(), nil
default:
return nil, errors.New("unknown blob data")
}
Expand Down
21 changes: 16 additions & 5 deletions osmpbf/zlib_cgo.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
//go:build cgo
// +build cgo

package osmpbf

import (
"bytes"
"io"
"fmt"

"github.com/datadog/czlib"
deflate "github.com/4kills/go-libdeflate/v2"
)

func zlibReader(data []byte) (io.ReadCloser, error) {
return czlib.NewReader(bytes.NewReader(data))
func decompress(in []byte, size int, data []byte) ([]byte, error) {
if cap(data) > (int)(size) {
data = data[0:size]
} else {
data = nil
}

_, buf, err := deflate.DecompressZlib(in, data)
if len(buf) != int(size) {
return nil, fmt.Errorf("raw blob data size %d but expected %d", len(buf), size)
}

return buf, err
}
27 changes: 24 additions & 3 deletions osmpbf/zlib_go.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,34 @@
//go:build !cgo
// +build !cgo

package osmpbf

import (
"bytes"
"compress/zlib"
"io"
"fmt"
)

func zlibReader(data []byte) (io.ReadCloser, error) {
return zlib.NewReader(bytes.NewReader(data))
func decompress(in []byte, size int, data []byte) ([]byte, error) {
r, err := zlib.NewReader(bytes.NewReader(in))
if err != nil {
return nil, err
}

// using the bytes.Buffer allows for the preallocation of the necessary space.
l := size + bytes.MinRead
if cap(data) < int(l) {
data = make([]byte, 0, l+l/10)
} else {
data = data[:0]
}
buf := bytes.NewBuffer(data)
if _, err = buf.ReadFrom(r); err != nil {
return nil, err
}

if buf.Len() != int(size) {
return nil, fmt.Errorf("raw blob data size %d but expected %d", buf.Len(), size)
}
return buf.Bytes(), nil
}

0 comments on commit bd5d16b

Please sign in to comment.