Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fuzzing for charset detector functions #299

Merged
merged 10 commits into from
Jul 4, 2022
19 changes: 9 additions & 10 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,38 @@ permissions:

jobs:
lint:
strategy:
matrix:
go-version: ["1.18.x"]
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/[email protected]
with:
go-version: 1.16
go-version: "1.18.x"
- name: Checkout code
uses: actions/[email protected]
- name: Run linters
uses: golangci/[email protected]
with:
version: "v1.37.1"
go-version: ${{ matrix.go-version }}
version: "v1.45.2"
go-version: "1.18.x"

test:
strategy:
matrix:
go-version: ["1.12.0", "1.18.x"]
platform: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
if: success()
uses: actions/[email protected]
with:
go-version: ${{ matrix.go-version }}
go-version: "1.18.x"
- name: Checkout code
uses: actions/[email protected]
- name: Run tests
run: go test -race ./...
- run: go test -race ./...
- run: go test -fuzz=. -fuzztime=30s
- run: go test -fuzz=Plain -fuzztime=30s ./internal/charset
- run: go test -fuzz=XML -fuzztime=30s ./internal/charset
- run: go test -fuzz=HTML -fuzztime=30s ./internal/charset

coverage:
runs-on: ubuntu-latest
Expand Down
55 changes: 55 additions & 0 deletions internal/charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,58 @@ func TestFromPlain(t *testing.T) {
}
}
}

func FuzzFromPlain(f *testing.F) {
samples := [][]byte{
[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85},
[]byte{0xe6, 0xf8, 0xe5},
[]byte("æøå"),
}

for _, s := range samples {
f.Add(s)
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromPlain(d); charset == "" {
t.Skip()
}
})
}
func FuzzFromHTML(f *testing.F) {
samples := []string{
`<meta charset="c">`,
`<meta charset="щ">`,
`<meta http-equiv="content-type" content="a/b; charset=c">`,
`<meta http-equiv="content-type" content="a/b; charset=щ">`,
`<f 1=2 /><meta charset="c">`,
`<f a=2><meta http-equiv="content-type" content="a/b; charset=c">`,
`<f 1=2 /><meta b="b" charset="c">`,
`<f a=2><meta b="b" http-equiv="content-type" content="a/b; charset=c">`,
}

for _, s := range samples {
f.Add([]byte(s))
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromHTML(d); charset == "" {
t.Skip()
}
})
}
func FuzzFromXML(f *testing.F) {
samples := []string{
`<?xml version="1.0" encoding="c"?>`,
}

for _, s := range samples {
f.Add([]byte(s))
}

f.Fuzz(func(t *testing.T, d []byte) {
if charset := FromXML(d); charset == "" {
t.Skip()
}
})
}
39 changes: 39 additions & 0 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"mime"
"os"
Expand Down Expand Up @@ -602,3 +603,41 @@ func TestExtend(t *testing.T) {
})
}
}

// Because of the random nature of fuzzing I don't think there is a way to test
// the correctness of the Detect results. Still there is value in fuzzing in
// search for panics.
func FuzzMimetype(f *testing.F) {
// Some of the more interesting file formats. Most formats are detected by
// checking some magic numbers in headers, but these have more complicated
// detection algorithms.
corpus := []string{
"testdata/mkv.mkv",
"testdata/webm.webm",
"testdata/docx.docx",
"testdata/pptx.pptx",
"testdata/xlsx.xlsx",
"testdata/3gp.3gp",
"testdata/class.class",
}
for _, c := range corpus {
data, err := ioutil.ReadFile(c)
if err != nil {
f.Fatal(err)
}
f.Add(data[:100])
}
// First node is root. Remove it because it matches any input.
detectors := root.flatten()[1:]
f.Fuzz(func(t *testing.T, data []byte) {
matched := false
for _, d := range detectors {
if d.detector(data, math.MaxUint32) {
matched = true
}
}
if !matched {
t.Skip()
}
})
}