Skip to content

Commit

Permalink
Add fuzzing for charset and for detector functions (#299)
Browse files Browse the repository at this point in the history
* Add fuzzing for charset detector functions

* Add fuzzing for whole library

* Upgrade golang CI version to 1.18
  • Loading branch information
gabriel-vasile authored Jul 4, 2022
1 parent 42b3f45 commit 6398b29
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 10 deletions.
19 changes: 9 additions & 10 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,38 @@ permissions:

jobs:
lint:
strategy:
matrix:
go-version: ["1.18.x"]
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/[email protected]
with:
go-version: 1.16
go-version: "1.18.x"
- name: Checkout code
uses: actions/[email protected]
- name: Run linters
uses: golangci/[email protected]
with:
version: "v1.37.1"
go-version: ${{ matrix.go-version }}
version: "v1.45.2"
go-version: "1.18.x"

test:
strategy:
matrix:
go-version: ["1.12.0", "1.18.x"]
platform: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
if: success()
uses: actions/[email protected]
with:
go-version: ${{ matrix.go-version }}
go-version: "1.18.x"
- name: Checkout code
uses: actions/[email protected]
- name: Run tests
run: go test -race ./...
- run: go test -race ./...
- run: go test -fuzz=. -fuzztime=30s
- run: go test -fuzz=Plain -fuzztime=30s ./internal/charset
- run: go test -fuzz=XML -fuzztime=30s ./internal/charset
- run: go test -fuzz=HTML -fuzztime=30s ./internal/charset

coverage:
runs-on: ubuntu-latest
Expand Down
55 changes: 55 additions & 0 deletions internal/charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,58 @@ func TestFromPlain(t *testing.T) {
}
}
}

func FuzzFromPlain(f *testing.F) {
samples := [][]byte{
[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85},
[]byte{0xe6, 0xf8, 0xe5},
[]byte("æøå"),
}

for _, s := range samples {
f.Add(s)
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromPlain(d); charset == "" {
t.Skip()
}
})
}
func FuzzFromHTML(f *testing.F) {
samples := []string{
`<meta charset="c">`,
`<meta charset="щ">`,
`<meta http-equiv="content-type" content="a/b; charset=c">`,
`<meta http-equiv="content-type" content="a/b; charset=щ">`,
`<f 1=2 /><meta charset="c">`,
`<f a=2><meta http-equiv="content-type" content="a/b; charset=c">`,
`<f 1=2 /><meta b="b" charset="c">`,
`<f a=2><meta b="b" http-equiv="content-type" content="a/b; charset=c">`,
}

for _, s := range samples {
f.Add([]byte(s))
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromHTML(d); charset == "" {
t.Skip()
}
})
}
func FuzzFromXML(f *testing.F) {
samples := []string{
`<?xml version="1.0" encoding="c"?>`,
}

for _, s := range samples {
f.Add([]byte(s))
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromXML(d); charset == "" {
t.Skip()
}
})
}
39 changes: 39 additions & 0 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"mime"
"os"
Expand Down Expand Up @@ -602,3 +603,41 @@ func TestExtend(t *testing.T) {
})
}
}

// Because of the random nature of fuzzing I don't think there is a way to test
// the correctness of the Detect results. Still there is value in fuzzing in
// search for panics.
func FuzzMimetype(f *testing.F) {
// Some of the more interesting file formats. Most formats are detected by
// checking some magic numbers in headers, but these have more complicated
// detection algorithms.
corpus := []string{
"testdata/mkv.mkv",
"testdata/webm.webm",
"testdata/docx.docx",
"testdata/pptx.pptx",
"testdata/xlsx.xlsx",
"testdata/3gp.3gp",
"testdata/class.class",
}
for _, c := range corpus {
data, err := ioutil.ReadFile(c)
if err != nil {
f.Fatal(err)
}
f.Add(data[:100])
}
// First node is root. Remove it because it matches any input.
detectors := root.flatten()[1:]
f.Fuzz(func(t *testing.T, data []byte) {
matched := false
for _, d := range detectors {
if d.detector(data, math.MaxUint32) {
matched = true
}
}
if !matched {
t.Skip()
}
})
}

0 comments on commit 6398b29

Please sign in to comment.