From 7008ceb39c982046de33c65decadaa3449ee4ec2 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Thu, 5 Sep 2024 13:17:14 +0900 Subject: [PATCH 01/12] separate base filter, and introduce max line length limit --- filter/count/count.go | 3 +- filter/cut/cut.go | 3 +- filter/grep.go | 3 +- filter/hash.go | 3 +- filter/head.go | 3 +- filter/jsonarray/jsonarray.go | 3 +- filter/ltsv.go | 3 +- filter/pager/pager.go | 3 +- filter/tail/tail.go | 4 +- {filter => internal/filterbase}/base.go | 42 ++++++++++---------- {filter => internal/filterbase}/base_test.go | 2 +- 11 files changed, 39 insertions(+), 33 deletions(-) rename {filter => internal/filterbase}/base.go (72%) rename {filter => internal/filterbase}/base_test.go (96%) diff --git a/filter/count/count.go b/filter/count/count.go index 8598e9d..92a7f75 100644 --- a/filter/count/count.go +++ b/filter/count/count.go @@ -7,6 +7,7 @@ import ( "strconv" "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -19,7 +20,7 @@ func newCount(r *resource.Resource, p filter.Params) (*resource.Resource, error) } type Count struct { - filter.Base + filterbase.Base n int64 } diff --git a/filter/cut/cut.go b/filter/cut/cut.go index 3aa2a01..e409afc 100644 --- a/filter/cut/cut.go +++ b/filter/cut/cut.go @@ -10,12 +10,13 @@ import ( "strings" "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) // Cut represents cut filter. type Cut struct { - filter.Base + filterbase.Base delim []byte splitter SplitFunc selectors []cutSelector diff --git a/filter/grep.go b/filter/grep.go index 7f82fab..84cdf4b 100644 --- a/filter/grep.go +++ b/filter/grep.go @@ -6,12 +6,13 @@ import ( "io" "regexp" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) // Grep represents grep like filter. type Grep struct { - Base + filterbase.Base currLnum int re *regexp.Regexp diff --git a/filter/hash.go b/filter/hash.go index cbf83eb..db8feef 100644 --- a/filter/hash.go +++ b/filter/hash.go @@ -12,12 +12,13 @@ import ( "io" "strings" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) // Hash represents hash filter. type Hash struct { - Base + filterbase.Base s int h hash.Hash enc hashEncoder diff --git a/filter/head.go b/filter/head.go index 7a00313..4393903 100644 --- a/filter/head.go +++ b/filter/head.go @@ -5,6 +5,7 @@ import ( "bytes" "io" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -26,7 +27,7 @@ func newHead(r *resource.Resource, p Params) (*resource.Resource, error) { // Head is "head" like filter. type Head struct { - Base + filterbase.Base start uint last uint curr uint diff --git a/filter/jsonarray/jsonarray.go b/filter/jsonarray/jsonarray.go index 324f045..a682078 100644 --- a/filter/jsonarray/jsonarray.go +++ b/filter/jsonarray/jsonarray.go @@ -7,6 +7,7 @@ import ( "io" "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -19,7 +20,7 @@ func newFilter(r *resource.Resource, p filter.Params) (*resource.Resource, error } type Filter struct { - filter.Base + filterbase.Base first bool last bool diff --git a/filter/ltsv.go b/filter/ltsv.go index f4ed9a7..4a82592 100644 --- a/filter/ltsv.go +++ b/filter/ltsv.go @@ -6,6 +6,7 @@ import ( "regexp" "strings" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -52,7 +53,7 @@ func (v ltsvValue) put(buf *bytes.Buffer) error { // LTSV represents a structure for LTSV (labeled tab separated value) type LTSV struct { - Base + filterbase.Base label string re *regexp.Regexp match bool diff --git a/filter/pager/pager.go b/filter/pager/pager.go index 477c76d..ddbc4eb 100644 --- a/filter/pager/pager.go +++ b/filter/pager/pager.go @@ -13,11 +13,12 @@ import ( "github.com/koron-go/ringbuf" "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) type Pager struct { - filter.Base + filterbase.Base rx *regexp.Regexp pages []int // sorted positive numbers diff --git a/filter/tail/tail.go b/filter/tail/tail.go index a1a12e8..c26fc76 100644 --- a/filter/tail/tail.go +++ b/filter/tail/tail.go @@ -5,12 +5,12 @@ import ( "io" "github.com/koron-go/ringbuf" - "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" ) // Tail is "tail" like filter. type Tail struct { - filter.Base + filterbase.Base rf bool rb *ringbuf.Buffer[[]byte] diff --git a/filter/base.go b/internal/filterbase/base.go similarity index 72% rename from filter/base.go rename to internal/filterbase/base.go index 5e8bf16..d5353cf 100644 --- a/filter/base.go +++ b/internal/filterbase/base.go @@ -1,9 +1,16 @@ -package filter +package filterbase import ( "bufio" "bytes" + "errors" "io" + + "github.com/koron/nvgd/config" +) + +var ( + ErrMaxLineExceeded = errors.New("maximum line length is exceeded. this limit can be extended with config.filters._base.max_line_len") ) // Base is base of filters. It provides common features for filter. @@ -18,9 +25,19 @@ type Base struct { // BaseReadNext is callback to read next data hunk to buf type BaseReadNext func(buf *bytes.Buffer) error +var Config = struct { + MaxLineLen int `yaml: max_line_len` +}{ + MaxLineLen: 1 * 1024 * 1024, +} + +func init() { + config.RegisterFilter("_base_", &Config) +} + // Init initializes Base object. func (b *Base) Init(r io.ReadCloser, readNext BaseReadNext) { - b.Reader = bufio.NewReader(r) + b.Reader = bufio.NewReaderSize(r, Config.MaxLineLen) b.raw = r b.rn = readNext } @@ -53,26 +70,7 @@ func (b *Base) ReadLine() ([]byte, error) { if err != bufio.ErrBufferFull { return nil, err } - - // for long line - bb := bytes.Buffer{} - if _, err := bb.Write(d); err != nil { - return nil, err - } - for { - b2, err := b.Reader.ReadSlice('\n') - if len(b2) > 0 { - if _, err := bb.Write(b2); err != nil { - return nil, err - } - } - if err == nil || err == io.EOF { - return bb.Bytes(), nil - } - if err != bufio.ErrBufferFull { - return nil, err - } - } + return nil, ErrMaxLineExceeded } // Close closes head filter. diff --git a/filter/base_test.go b/internal/filterbase/base_test.go similarity index 96% rename from filter/base_test.go rename to internal/filterbase/base_test.go index 1609ff0..1b5396a 100644 --- a/filter/base_test.go +++ b/internal/filterbase/base_test.go @@ -1,4 +1,4 @@ -package filter +package filterbase import ( "bytes" From a1158c11c9621a94c7287f0322999c597c149170 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Thu, 5 Sep 2024 16:55:58 +0900 Subject: [PATCH 02/12] tests and fix bugs --- internal/filterbase/base.go | 4 +-- internal/filterbase/base_test.go | 29 +++++++++++++++++++--- internal/filterbase/testdata/maxlen_2M.yml | 3 +++ internal/filterbase/testdata/maxlen_4K.yml | 3 +++ internal/ltsv/reader.go | 21 +++------------- 5 files changed, 38 insertions(+), 22 deletions(-) create mode 100644 internal/filterbase/testdata/maxlen_2M.yml create mode 100644 internal/filterbase/testdata/maxlen_4K.yml diff --git a/internal/filterbase/base.go b/internal/filterbase/base.go index d5353cf..962cb2c 100644 --- a/internal/filterbase/base.go +++ b/internal/filterbase/base.go @@ -10,7 +10,7 @@ import ( ) var ( - ErrMaxLineExceeded = errors.New("maximum line length is exceeded. this limit can be extended with config.filters._base.max_line_len") + ErrMaxLineExceeded = errors.New("maximum line length is exceeded. this limit can be extended with config.filters._base_.max_line_len") ) // Base is base of filters. It provides common features for filter. @@ -26,7 +26,7 @@ type Base struct { type BaseReadNext func(buf *bytes.Buffer) error var Config = struct { - MaxLineLen int `yaml: max_line_len` + MaxLineLen int `yaml:"max_line_len"` }{ MaxLineLen: 1 * 1024 * 1024, } diff --git a/internal/filterbase/base_test.go b/internal/filterbase/base_test.go index 1b5396a..5056a46 100644 --- a/internal/filterbase/base_test.go +++ b/internal/filterbase/base_test.go @@ -1,19 +1,22 @@ -package filterbase +package filterbase_test import ( "bytes" "io" "testing" + + "github.com/koron/nvgd/config" + "github.com/koron/nvgd/internal/filterbase" ) -func TestBaseReadLine(t *testing.T) { +func TestReadLine(t *testing.T) { buf := make([]byte, 16384) for i := range buf { buf[i] = 'a' } buf[len(buf)-1] = '\n' - b := &Base{} + b := &filterbase.Base{} b.Init(io.NopCloser(bytes.NewReader(buf)), nil) d, err := b.ReadLine() @@ -31,4 +34,24 @@ func TestBaseReadLine(t *testing.T) { t.Fatalf("at %d not match: %c != %c", i, d[i], buf[i]) } } + + if err := b.Close(); err != nil { + t.Errorf("failed to close: %s", err) + } +} + +func TestConfigMaxLineLen(t *testing.T) { + if _, err := config.LoadConfig("testdata/maxlen_4K.yml"); err != nil { + t.Fatal(err) + } + if got, want := filterbase.Config.MaxLineLen, 4096; got != want { + t.Errorf("incorrect max_line_len: want=%d got=%d", want, got) + } + + if _, err := config.LoadConfig("testdata/maxlen_2M.yml"); err != nil { + t.Fatal(err) + } + if got, want := filterbase.Config.MaxLineLen, 2097152; got != want { + t.Errorf("incorrect max_line_len: want=%d got=%d", want, got) + } } diff --git a/internal/filterbase/testdata/maxlen_2M.yml b/internal/filterbase/testdata/maxlen_2M.yml new file mode 100644 index 0000000..85a18d9 --- /dev/null +++ b/internal/filterbase/testdata/maxlen_2M.yml @@ -0,0 +1,3 @@ +filters: + _base_: + max_line_len: 2097152 diff --git a/internal/filterbase/testdata/maxlen_4K.yml b/internal/filterbase/testdata/maxlen_4K.yml new file mode 100644 index 0000000..7378061 --- /dev/null +++ b/internal/filterbase/testdata/maxlen_4K.yml @@ -0,0 +1,3 @@ +filters: + _base_: + max_line_len: 4096 diff --git a/internal/ltsv/reader.go b/internal/ltsv/reader.go index 2473797..3d704cd 100644 --- a/internal/ltsv/reader.go +++ b/internal/ltsv/reader.go @@ -4,6 +4,8 @@ import ( "bufio" "bytes" "io" + + "github.com/koron/nvgd/internal/filterbase" ) // Reader reads LTSV values. @@ -14,7 +16,7 @@ type Reader struct { // NewReader creates a new LTSV reader. func NewReader(r io.Reader) *Reader { return &Reader{ - rd: bufio.NewReader(r), + rd: bufio.NewReaderSize(r, filterbase.Config.MaxLineLen), } } @@ -28,22 +30,7 @@ func (r *Reader) readLine() ([]byte, error) { } else if err != bufio.ErrBufferFull { return nil, err } - // Read the rest of the lines that have gone beyond the default buffer into - // another, larger buffer. - bb := bytes.NewBuffer(make([]byte, 0, 8192)) - bb.Write(d) // bytes.Buffer.Writer never be failed. - for { - d2, err := r.rd.ReadSlice('\n') - if len(d2) > 0 { - bb.Write(d2) // bytes.Buffer.Writer never be failed. - } - if err == nil || err == io.EOF { - return bb.Bytes(), nil - } - if err != bufio.ErrBufferFull { - return nil, err - } - } + return nil, filterbase.ErrMaxLineExceeded } // Read read a LTSV value. From 1f73b3e2bd510411639251db4191ed8392144eeb Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Thu, 5 Sep 2024 17:33:51 +0900 Subject: [PATCH 03/12] split LTSV reader usages, filter and others --- core/alias.go | 2 +- filter/htmltable/htmltable.go | 6 ++++-- filter/indexhtml/indexhtml.go | 4 ++-- filter/texttable/texttable.go | 3 ++- internal/filterbase/ltsv_reader.go | 25 +++++++++++++++++++++++++ internal/ltsv/reader.go | 17 ++++++++--------- internal/ltsv/reader_test.go | 4 ++-- 7 files changed, 44 insertions(+), 17 deletions(-) create mode 100644 internal/filterbase/ltsv_reader.go diff --git a/core/alias.go b/core/alias.go index a1be1b9..494424e 100644 --- a/core/alias.go +++ b/core/alias.go @@ -27,7 +27,7 @@ func (a alias) rewritePath(src string) string { func (a alias) rewriteLTSV(src *resource.Resource) (*resource.Resource, error) { buf := &bytes.Buffer{} - lr := ltsv.NewReader(src) + lr := ltsv.NewReaderSize(src, 4096) for { s, err := lr.Read() if err != nil { diff --git a/filter/htmltable/htmltable.go b/filter/htmltable/htmltable.go index 77b0ae1..9117344 100644 --- a/filter/htmltable/htmltable.go +++ b/filter/htmltable/htmltable.go @@ -3,6 +3,7 @@ package htmltable import ( "bytes" + "errors" "html/template" "io" "strings" @@ -11,6 +12,7 @@ import ( "github.com/koron/nvgd/config" "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/commonconst" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/internal/ltsv" "github.com/koron/nvgd/resource" ) @@ -158,13 +160,13 @@ func filterFunc(r *resource.Resource, p filter.Params) (*resource.Resource, erro Linefeed: p.Bool(commonconst.Linefeed, false), Config: &cfg, } - lr := ltsv.NewReader(r) + lr := filterbase.NewLTSVReader(r) first := true for { s, err := lr.Read() if err != nil { r.Close() - if err != io.EOF { + if !errors.Is(err, io.EOF) { return nil, err } break diff --git a/filter/indexhtml/indexhtml.go b/filter/indexhtml/indexhtml.go index 5df1c9c..2ec1f57 100644 --- a/filter/indexhtml/indexhtml.go +++ b/filter/indexhtml/indexhtml.go @@ -14,7 +14,7 @@ import ( "github.com/koron/nvgd/config" "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/commonconst" - "github.com/koron/nvgd/internal/ltsv" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -111,7 +111,7 @@ func filterFunc(r *resource.Resource, p filter.Params) (*resource.Resource, erro d := &doc{ Config: &cfg, } - lr := ltsv.NewReader(r) + lr := filterbase.NewLTSVReader(r) for { s, err := lr.Read() if err != nil { diff --git a/filter/texttable/texttable.go b/filter/texttable/texttable.go index cd5d985..e73ea1c 100644 --- a/filter/texttable/texttable.go +++ b/filter/texttable/texttable.go @@ -6,6 +6,7 @@ import ( "io" "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/internal/ltsv" "github.com/koron/nvgd/resource" "github.com/olekukonko/tablewriter" @@ -70,7 +71,7 @@ type row struct { func filterFunc(r *resource.Resource, p filter.Params) (*resource.Resource, error) { // compose document. d := &doc{} - lr := ltsv.NewReader(r) + lr := filterbase.NewLTSVReader(r) first := true for { s, err := lr.Read() diff --git a/internal/filterbase/ltsv_reader.go b/internal/filterbase/ltsv_reader.go new file mode 100644 index 0000000..870400e --- /dev/null +++ b/internal/filterbase/ltsv_reader.go @@ -0,0 +1,25 @@ +package filterbase + +import ( + "bufio" + "errors" + "io" + + "github.com/koron/nvgd/internal/ltsv" +) + +type LTSVReader struct { + r ltsv.Reader +} + +func NewLTSVReader(r io.Reader) *ltsv.Reader { + return ltsv.NewReaderSize(r, Config.MaxLineLen) +} + +func (r *LTSVReader) Read() (*ltsv.Set, error) { + set, err := r.r.Read() + if errors.Is(err, bufio.ErrBufferFull) { + return nil, ErrMaxLineExceeded + } + return set, nil +} diff --git a/internal/ltsv/reader.go b/internal/ltsv/reader.go index 3d704cd..a964bf9 100644 --- a/internal/ltsv/reader.go +++ b/internal/ltsv/reader.go @@ -4,8 +4,6 @@ import ( "bufio" "bytes" "io" - - "github.com/koron/nvgd/internal/filterbase" ) // Reader reads LTSV values. @@ -16,21 +14,22 @@ type Reader struct { // NewReader creates a new LTSV reader. func NewReader(r io.Reader) *Reader { return &Reader{ - rd: bufio.NewReaderSize(r, filterbase.Config.MaxLineLen), + rd: bufio.NewReader(r), + } +} + +func NewReaderSize(r io.Reader, maxLineLen int) *Reader { + return &Reader{ + rd: bufio.NewReaderSize(r, maxLineLen), } } func (r *Reader) readLine() ([]byte, error) { - // FIXME: Avoids loading a file with very long line and allocating - // excessive memory. It would be better to use a larger buffer from the - // beginning, nor never extending . d, err := r.rd.ReadSlice('\n') if err == nil || (err == io.EOF && len(d) > 0) { return d, nil - } else if err != bufio.ErrBufferFull { - return nil, err } - return nil, filterbase.ErrMaxLineExceeded + return nil, err } // Read read a LTSV value. diff --git a/internal/ltsv/reader_test.go b/internal/ltsv/reader_test.go index ce148f1..45adcb1 100644 --- a/internal/ltsv/reader_test.go +++ b/internal/ltsv/reader_test.go @@ -103,7 +103,7 @@ func TestLongLine(t *testing.T) { t.Fatal(err) } defer f.Close() - r := NewReader(f) + r := NewReaderSize(f, 1*1024*1024) set, err := r.Read() if err != nil { t.Fatal(err) @@ -138,7 +138,7 @@ func TestLongLine2(t *testing.T) { t.Fatal(err) } defer f.Close() - r := NewReader(f) + r := NewReaderSize(f, 1*1024*1024) set, err := r.Read() if err != nil { t.Fatal(err) From 2f0d3b84e5a62f119d0fc9c7f6544cea12ae2f6f Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 00:26:09 +0900 Subject: [PATCH 04/12] split filterbase.Base.ReadLine to filterbase.LineReader --- filter/count/count.go | 10 ++++++--- filter/count/count_test.go | 14 +++++++++++++ filter/cut/cut.go | 4 +++- filter/grep.go | 14 +++++++------ filter/jsonarray/jsonarray.go | 8 ++++++-- filter/jsonarray/jsonarray_test.go | 20 ++++++++++++++++++ filter/ltsv.go | 23 +++++++++++++-------- filter/ltsv_test.go | 12 +++++++++++ filter/pager/pager.go | 4 +++- filter/tail/tail.go | 6 ++++-- internal/filterbase/base.go | 14 ------------- internal/filterbase/base_test.go | 33 ------------------------------ internal/filterbase/line_reader.go | 30 +++++++++++++++++++++++++++ 13 files changed, 121 insertions(+), 71 deletions(-) create mode 100644 filter/count/count_test.go create mode 100644 filter/jsonarray/jsonarray_test.go create mode 100644 filter/ltsv_test.go create mode 100644 internal/filterbase/line_reader.go diff --git a/filter/count/count.go b/filter/count/count.go index 92a7f75..86ed265 100644 --- a/filter/count/count.go +++ b/filter/count/count.go @@ -21,11 +21,15 @@ func newCount(r *resource.Resource, p filter.Params) (*resource.Resource, error) type Count struct { filterbase.Base - n int64 + reader *filterbase.LineReader + n int64 } func New(r io.ReadCloser) *Count { - c := &Count{n: -1} + c := &Count{ + reader: filterbase.NewLineReader(r), + n: -1, + } c.Base.Init(r, c.readNext) return c } @@ -36,7 +40,7 @@ func (c *Count) readNext(buf *bytes.Buffer) error { } c.n = 0 for { - raw, err := c.ReadLine() + raw, err := c.reader.ReadLine() if err != nil && len(raw) == 0 { if err == io.EOF { if _, err := buf.WriteString(strconv.FormatInt(c.n, 10)); err != nil { diff --git a/filter/count/count_test.go b/filter/count/count_test.go new file mode 100644 index 0000000..9250ea8 --- /dev/null +++ b/filter/count/count_test.go @@ -0,0 +1,14 @@ +package count + +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) + +func TestCount(t *testing.T) { + filtertest.Check(t, newCount, filter.Params{}, "a\nb\nc\nd\n", "4") + filtertest.Check(t, newCount, filter.Params{}, "a\nb\nc\nd", "4") + filtertest.Check(t, newCount, filter.Params{}, "a\nb\nc\n", "3") +} diff --git a/filter/cut/cut.go b/filter/cut/cut.go index e409afc..0f914f6 100644 --- a/filter/cut/cut.go +++ b/filter/cut/cut.go @@ -17,6 +17,7 @@ import ( // Cut represents cut filter. type Cut struct { filterbase.Base + reader *filterbase.LineReader delim []byte splitter SplitFunc selectors []cutSelector @@ -31,6 +32,7 @@ type cutWriter func(io.Writer, []byte) error // NewCut creates an instance of cut filter. func NewCut(r io.ReadCloser, delim []byte, selectors []cutSelector, splitFunc SplitFunc) *Cut { f := &Cut{ + reader: filterbase.NewLineReader(r), delim: delim, splitter: splitFunc, } @@ -50,7 +52,7 @@ func NewCut(r io.ReadCloser, delim []byte, selectors []cutSelector, splitFunc Sp } func (f *Cut) readNext(buf *bytes.Buffer) error { - raw, err := f.ReadLine() + raw, err := f.reader.ReadLine() if err != nil && (err != io.EOF || len(raw) == 0) { return err } diff --git a/filter/grep.go b/filter/grep.go index 84cdf4b..269e217 100644 --- a/filter/grep.go +++ b/filter/grep.go @@ -13,6 +13,7 @@ import ( // Grep represents grep like filter. type Grep struct { filterbase.Base + reader *filterbase.LineReader currLnum int re *regexp.Regexp @@ -28,11 +29,12 @@ type Grep struct { // NewGrep creates an instance of grep filter. func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf LineFilter, lnum bool, cnum int) *Grep { g := &Grep{ - re: re, - match: match, - lf: TrimEOL.Chain(lf), - lnum: lnum, - cnum: cnum, + reader: filterbase.NewLineReader(r), + re: re, + match: match, + lf: TrimEOL.Chain(lf), + lnum: lnum, + cnum: cnum, } if cnum > 0 { g.contextBefore = make([][]byte, 0, cnum) @@ -43,7 +45,7 @@ func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf LineFilter, lnum func (g *Grep) readNext(buf *bytes.Buffer) error { for { - raw, err := g.ReadLine() + raw, err := g.reader.ReadLine() if err != nil && len(raw) == 0 { return err } diff --git a/filter/jsonarray/jsonarray.go b/filter/jsonarray/jsonarray.go index a682078..96cc6e1 100644 --- a/filter/jsonarray/jsonarray.go +++ b/filter/jsonarray/jsonarray.go @@ -1,4 +1,5 @@ // Package jsonarray provides JSON array filter. +// The json array filter converts string arrays to JSON array. package jsonarray import ( @@ -21,13 +22,16 @@ func newFilter(r *resource.Resource, p filter.Params) (*resource.Resource, error type Filter struct { filterbase.Base + reader *filterbase.LineReader first bool last bool } func New(r io.ReadCloser) *Filter { - f := &Filter{} + f := &Filter{ + reader: filterbase.NewLineReader(r), + } f.Base.Init(r, f.readNext) return f } @@ -40,7 +44,7 @@ func (f *Filter) readNext(buf *bytes.Buffer) error { buf.WriteByte('[') f.first = true } - b, err0 := f.Base.ReadLine() + b, err0 := f.reader.ReadLine() if n := len(b); n > 0 && b[n-1] == '\n' { b = b[:n-1] } diff --git a/filter/jsonarray/jsonarray_test.go b/filter/jsonarray/jsonarray_test.go new file mode 100644 index 0000000..d8f8a88 --- /dev/null +++ b/filter/jsonarray/jsonarray_test.go @@ -0,0 +1,20 @@ +package jsonarray + +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) + +func TestJSONArray(t *testing.T) { + filtertest.Check(t, newFilter, filter.Params{}, "aaa\nbbb\nccc", `["aaa", +"bbb", +"ccc"] +`) + filtertest.Check(t, newFilter, filter.Params{}, "aaa\nbbb\nccc\n", `["aaa", +"bbb", +"ccc", +""] +`) +} diff --git a/filter/ltsv.go b/filter/ltsv.go index 4a82592..36670c1 100644 --- a/filter/ltsv.go +++ b/filter/ltsv.go @@ -54,19 +54,21 @@ func (v ltsvValue) put(buf *bytes.Buffer) error { // LTSV represents a structure for LTSV (labeled tab separated value) type LTSV struct { filterbase.Base - label string - re *regexp.Regexp - match bool - cut []string + reader *filterbase.LineReader + label string + re *regexp.Regexp + match bool + cut []string } // NewLTSV creates a new instance of LTSV. func NewLTSV(r io.ReadCloser, label string, re *regexp.Regexp, match bool, cut []string) *LTSV { l := <SV{ - label: label, - re: re, - match: match, - cut: cut, + label: label, + reader: filterbase.NewLineReader(r), + re: re, + match: match, + cut: cut, } l.Base.Init(r, l.readNext) return l @@ -74,10 +76,13 @@ func NewLTSV(r io.ReadCloser, label string, re *regexp.Regexp, match bool, cut [ func (l *LTSV) readNext(buf *bytes.Buffer) error { for { - b, err := l.ReadLine() + b, err := l.reader.ReadLine() if err != nil { return err } + if last := len(b) - 1; last > 0 && b[last] == '\n' { + b = b[0:last] + } v := parseLTSV(string(b)) if !l.isMatch(v) { continue diff --git a/filter/ltsv_test.go b/filter/ltsv_test.go new file mode 100644 index 0000000..56c6d61 --- /dev/null +++ b/filter/ltsv_test.go @@ -0,0 +1,12 @@ +package filter + +import "testing" + +func TestLTSVFilter(t *testing.T) { + // TODO: fix LTSV filter. + // internal/ltsvを使うように変更し、その他のテストも書く + t.Skip("invalid just for now") + checkFilter(t, newLTSV, Params{}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n") +} diff --git a/filter/pager/pager.go b/filter/pager/pager.go index ddbc4eb..2b0fcc4 100644 --- a/filter/pager/pager.go +++ b/filter/pager/pager.go @@ -19,6 +19,7 @@ import ( type Pager struct { filterbase.Base + reader *filterbase.LineReader rx *regexp.Regexp pages []int // sorted positive numbers @@ -35,6 +36,7 @@ type Pager struct { func NewPager(r io.ReadCloser, rx *regexp.Regexp, pages, lasts []int, showNum bool) *Pager { f := &Pager{ + reader: filterbase.NewLineReader(r), rx: rx, pages: pages, lasts: lasts, @@ -50,7 +52,7 @@ func NewPager(r io.ReadCloser, rx *regexp.Regexp, pages, lasts []int, showNum bo func (f *Pager) readNext(buf *bytes.Buffer) error { for { - line, err := f.ReadLine() + line, err := f.reader.ReadLine() if err != nil { if !errors.Is(err, io.EOF) { return err diff --git a/filter/tail/tail.go b/filter/tail/tail.go index c26fc76..fa62946 100644 --- a/filter/tail/tail.go +++ b/filter/tail/tail.go @@ -11,6 +11,7 @@ import ( // Tail is "tail" like filter. type Tail struct { filterbase.Base + reader *filterbase.LineReader rf bool rb *ringbuf.Buffer[[]byte] @@ -22,7 +23,8 @@ func NewTail(r io.ReadCloser, limit int) *Tail { limit = 10 } t := &Tail{ - rb: ringbuf.New[[]byte](limit), + reader: filterbase.NewLineReader(r), + rb: ringbuf.New[[]byte](limit), } t.Base.Init(r, t.readNext) return t @@ -52,7 +54,7 @@ func (t *Tail) readNext(buf *bytes.Buffer) error { func (t *Tail) readAll() error { for { - b, err := t.ReadLine() + b, err := t.reader.ReadLine() if err == io.EOF { if len(b) > 0 { t.rb.Enqueue(b) diff --git a/internal/filterbase/base.go b/internal/filterbase/base.go index 962cb2c..5fe40d6 100644 --- a/internal/filterbase/base.go +++ b/internal/filterbase/base.go @@ -59,20 +59,6 @@ func (b *Base) Read(buf []byte) (int, error) { return b.buf.Read(buf) } -// ReadLine reads a line as []byte. -func (b *Base) ReadLine() ([]byte, error) { - d, err := b.Reader.ReadSlice('\n') - if err == nil || err == io.EOF { - bb := make([]byte, len(d)) - copy(bb, d) - return bb, err - } - if err != bufio.ErrBufferFull { - return nil, err - } - return nil, ErrMaxLineExceeded -} - // Close closes head filter. func (b *Base) Close() error { if b.closed { diff --git a/internal/filterbase/base_test.go b/internal/filterbase/base_test.go index 5056a46..2eb6f85 100644 --- a/internal/filterbase/base_test.go +++ b/internal/filterbase/base_test.go @@ -1,45 +1,12 @@ package filterbase_test import ( - "bytes" - "io" "testing" "github.com/koron/nvgd/config" "github.com/koron/nvgd/internal/filterbase" ) -func TestReadLine(t *testing.T) { - buf := make([]byte, 16384) - for i := range buf { - buf[i] = 'a' - } - buf[len(buf)-1] = '\n' - - b := &filterbase.Base{} - b.Init(io.NopCloser(bytes.NewReader(buf)), nil) - - d, err := b.ReadLine() - if err != nil { - t.Fatalf("ReadLine failed: %s", err) - } - if len(d) != 16384 { - t.Fatalf("unexpected length: %d", len(d)) - } - if len(d) != len(buf) { - t.Fatalf("length not match: %d != %d", len(d), len(buf)) - } - for i := range d { - if d[i] != buf[i] { - t.Fatalf("at %d not match: %c != %c", i, d[i], buf[i]) - } - } - - if err := b.Close(); err != nil { - t.Errorf("failed to close: %s", err) - } -} - func TestConfigMaxLineLen(t *testing.T) { if _, err := config.LoadConfig("testdata/maxlen_4K.yml"); err != nil { t.Fatal(err) diff --git a/internal/filterbase/line_reader.go b/internal/filterbase/line_reader.go new file mode 100644 index 0000000..5795dfe --- /dev/null +++ b/internal/filterbase/line_reader.go @@ -0,0 +1,30 @@ +package filterbase + +import ( + "bufio" + "errors" + "io" +) + +type LineReader struct { + buf *bufio.Reader +} + +func NewLineReader(r io.Reader) *LineReader { + return &LineReader{ + buf: bufio.NewReaderSize(r, Config.MaxLineLen), + } +} + +func (r *LineReader) ReadLine() ([]byte, error) { + b, err := r.buf.ReadSlice('\n') + if err == nil || errors.Is(err, io.EOF) { + bb := make([]byte, len(b)) + copy(bb, b) + return bb, err + } + if errors.Is(err, bufio.ErrBufferFull) { + return nil, ErrMaxLineExceeded + } + return nil, err +} From 447e1100b3bd6bd07c6ad50c375db5fdcd23795b Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 00:36:01 +0900 Subject: [PATCH 05/12] curve out grep filter to independent package --- filter/filter_test.go | 28 ++++++++++++++++++ filter/{ => grep}/grep.go | 17 +++++------ filter/grep/grep_test.go | 48 +++++++++++++++++++++++++++++++ filter/grep_test.go | 60 --------------------------------------- 4 files changed, 85 insertions(+), 68 deletions(-) create mode 100644 filter/filter_test.go rename filter/{ => grep}/grep.go (86%) create mode 100644 filter/grep/grep_test.go delete mode 100644 filter/grep_test.go diff --git a/filter/filter_test.go b/filter/filter_test.go new file mode 100644 index 0000000..2693e94 --- /dev/null +++ b/filter/filter_test.go @@ -0,0 +1,28 @@ +package filter + +import ( + "io" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/koron/nvgd/resource" +) + +func checkFilter(t *testing.T, f Factory, p Params, in, want string) { + t.Helper() + src := resource.NewString(in) + filter, err := f(src, p) + if err != nil { + t.Errorf("failed to create a filter: %s", err) + return + } + b, err := io.ReadAll(filter) + if err != nil { + t.Errorf("failed to read from filter: %s", err) + return + } + got := string(b) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("unexpected output from filter: -want +got\n%s", diff) + } +} diff --git a/filter/grep.go b/filter/grep/grep.go similarity index 86% rename from filter/grep.go rename to filter/grep/grep.go index 269e217..0721ec2 100644 --- a/filter/grep.go +++ b/filter/grep/grep.go @@ -1,4 +1,4 @@ -package filter +package grep import ( "bytes" @@ -6,6 +6,7 @@ import ( "io" "regexp" + "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -18,7 +19,7 @@ type Grep struct { re *regexp.Regexp match bool - lf LineFilter + lf filter.LineFilter lnum bool cnum int @@ -27,12 +28,12 @@ type Grep struct { } // NewGrep creates an instance of grep filter. -func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf LineFilter, lnum bool, cnum int) *Grep { +func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf filter.LineFilter, lnum bool, cnum int) *Grep { g := &Grep{ reader: filterbase.NewLineReader(r), re: re, match: match, - lf: TrimEOL.Chain(lf), + lf: filter.TrimEOL.Chain(lf), lnum: lnum, cnum: cnum, } @@ -90,7 +91,7 @@ func (g *Grep) output(buf *bytes.Buffer, lnum int, data []byte) error { return err } -func newGrep(r *resource.Resource, p Params) (*resource.Resource, error) { +func newGrep(r *resource.Resource, p filter.Params) (*resource.Resource, error) { re, err := regexp.Compile(p.String("re", "")) if err != nil { return nil, err @@ -98,18 +99,18 @@ func newGrep(r *resource.Resource, p Params) (*resource.Resource, error) { match := p.Bool("match", true) lnum := p.Bool("number", false) cnum := p.Int("context", 0) - var lf LineFilter + var lf filter.LineFilter // field filter var ( field = p.Int("field", 0) delim = []byte(p.String("delim", "\t")) ) if field > 0 { - lf = lf.Chain(NewCutLF(delim, field-1)) + lf = lf.Chain(filter.NewCutLF(delim, field-1)) } return r.Wrap(NewGrep(r, re, match, lf, lnum, cnum)), nil } func init() { - MustRegister("grep", newGrep) + filter.MustRegister("grep", newGrep) } diff --git a/filter/grep/grep_test.go b/filter/grep/grep_test.go new file mode 100644 index 0000000..853e6ad --- /dev/null +++ b/filter/grep/grep_test.go @@ -0,0 +1,48 @@ +package grep + +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) + +func TestGrep(t *testing.T) { + filtertest.Check(t, newGrep, + filter.Params{"re": "foo"}, "aaa\nfoo\nbbb\n", "foo\n") + + filtertest.Check(t, newGrep, + filter.Params{"re": "foo", "number": "true"}, + "aaa\nfoo\nbbb\n", + "2: foo\n") + filtertest.Check(t, newGrep, + filter.Params{"re": "foo", "number": "true"}, + "aaa\nfoo\nbbb\nfoo\nccc\n", + "2: foo\n4: foo\n") +} + +func TestGrepContext(t *testing.T) { + filtertest.Check(t, newGrep, + filter.Params{"re": "eee", "context": "1"}, + "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", + "ddd\neee\nfff\n") + filtertest.Check(t, newGrep, + filter.Params{"re": "eee", "context": "2"}, + "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", + "ccc\nddd\neee\nfff\nggg\n") + + filtertest.Check(t, newGrep, + filter.Params{"re": "eee", "context": "1", "number": "true"}, + "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", + "4: ddd\n5: eee\n6: fff\n") + + filtertest.Check(t, newGrep, + filter.Params{"re": "eee", "context": "2", "number": "true"}, + "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", + "3: ccc\n4: ddd\n5: eee\n6: fff\n7: ggg\n") + + filtertest.Check(t, newGrep, + filter.Params{"re": "XXX", "context": "2"}, + "aaa\nbbb\nccc\nXXX\neee\nXXX\nggg\nhhh\niii\n", + "bbb\nccc\nXXX\neee\nXXX\nggg\nhhh\n") +} diff --git a/filter/grep_test.go b/filter/grep_test.go deleted file mode 100644 index cad9147..0000000 --- a/filter/grep_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package filter - -import ( - "io" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/koron/nvgd/resource" -) - -func checkFilter(t *testing.T, f Factory, p Params, in, want string) { - t.Helper() - src := resource.NewString(in) - filter, err := f(src, p) - if err != nil { - t.Errorf("failed to create a filter: %s", err) - return - } - b, err := io.ReadAll(filter) - if err != nil { - t.Errorf("failed to read from filter: %s", err) - return - } - got := string(b) - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("unexpected output from filter: -want +got\n%s", diff) - } -} - -func TestGrep(t *testing.T) { - checkFilter(t, newGrep, Params{"re": "foo"}, "aaa\nfoo\nbbb\n", "foo\n") - - checkFilter(t, newGrep, Params{"re": "foo", "number": "true"}, - "aaa\nfoo\nbbb\n", - "2: foo\n") - checkFilter(t, newGrep, Params{"re": "foo", "number": "true"}, - "aaa\nfoo\nbbb\nfoo\nccc\n", - "2: foo\n4: foo\n") -} - -func TestGrepContext(t *testing.T) { - checkFilter(t, newGrep, Params{"re": "eee", "context": "1"}, - "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", - "ddd\neee\nfff\n") - checkFilter(t, newGrep, Params{"re": "eee", "context": "2"}, - "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", - "ccc\nddd\neee\nfff\nggg\n") - - checkFilter(t, newGrep, Params{"re": "eee", "context": "1", "number": "true"}, - "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", - "4: ddd\n5: eee\n6: fff\n") - - checkFilter(t, newGrep, Params{"re": "eee", "context": "2", "number": "true"}, - "aaa\nbbb\nccc\nddd\neee\nfff\nggg\nhhh\niii\n", - "3: ccc\n4: ddd\n5: eee\n6: fff\n7: ggg\n") - - checkFilter(t, newGrep, Params{"re": "XXX", "context": "2"}, - "aaa\nbbb\nccc\nXXX\neee\nXXX\nggg\nhhh\niii\n", - "bbb\nccc\nXXX\neee\nXXX\nggg\nhhh\n") -} From b02a8a76e6ca71eec7d684a1a2db91af1944ee7e Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 00:46:36 +0900 Subject: [PATCH 06/12] curve out head filter to independent package --- filter/{ => head}/head.go | 21 ++++++++++++--------- filter/head/head_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) rename filter/{ => head}/head.go (72%) create mode 100644 filter/head/head_test.go diff --git a/filter/head.go b/filter/head/head.go similarity index 72% rename from filter/head.go rename to filter/head/head.go index 4393903..b679f9d 100644 --- a/filter/head.go +++ b/filter/head/head.go @@ -1,19 +1,20 @@ -package filter +package head import ( "bufio" "bytes" "io" + "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) func init() { - MustRegister("head", newHead) + filter.MustRegister("head", newHead) } -func newHead(r *resource.Resource, p Params) (*resource.Resource, error) { +func newHead(r *resource.Resource, p filter.Params) (*resource.Resource, error) { start := p.Int("start", 0) if start < 0 { start = 0 @@ -28,9 +29,10 @@ func newHead(r *resource.Resource, p Params) (*resource.Resource, error) { // Head is "head" like filter. type Head struct { filterbase.Base - start uint - last uint - curr uint + reader *filterbase.LineReader + start uint + last uint + curr uint } var ( @@ -40,8 +42,9 @@ var ( // NewHead creates an instance of head filter. func NewHead(r io.ReadCloser, start, limit uint) *Head { h := &Head{ - start: start, - last: start + limit, + reader: filterbase.NewLineReader(r), + start: start, + last: start + limit, } h.Base.Init(r, h.readNext) return h @@ -50,7 +53,7 @@ func NewHead(r io.ReadCloser, start, limit uint) *Head { func (h *Head) readNext(buf *bytes.Buffer) error { for h.curr < h.last { lnum := h.curr - b, err := h.Reader.ReadSlice('\n') + b, err := h.reader.ReadLine() if err != nil { if err != bufio.ErrBufferFull { return err diff --git a/filter/head/head_test.go b/filter/head/head_test.go new file mode 100644 index 0000000..649def4 --- /dev/null +++ b/filter/head/head_test.go @@ -0,0 +1,28 @@ +package head + +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) + +func TestHead(t *testing.T) { + filtertest.Check(t, newHead, + filter.Params{}, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n", + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n") + filtertest.Check(t, newHead, + filter.Params{ + "start": "3", + }, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n", + "3\n4\n5\n6\n7\n8\n9\n") + filtertest.Check(t, newHead, + filter.Params{ + "start": "3", + "limit": "5", + }, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n", + "3\n4\n5\n6\n7\n") +} From f3eb0073ac8ba8ebfdc034db53e79fbcc9ab4c38 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 00:54:56 +0900 Subject: [PATCH 07/12] curve out hash filter to independent package --- filter/{ => hash}/hash.go | 7 ++++--- filter/hash/hash_test.go | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) rename filter/{ => hash}/hash.go (92%) create mode 100644 filter/hash/hash_test.go diff --git a/filter/hash.go b/filter/hash/hash.go similarity index 92% rename from filter/hash.go rename to filter/hash/hash.go index db8feef..684b81b 100644 --- a/filter/hash.go +++ b/filter/hash/hash.go @@ -1,4 +1,4 @@ -package filter +package hash import ( "bytes" @@ -12,6 +12,7 @@ import ( "io" "strings" + "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -103,7 +104,7 @@ func hashEncBin(w io.Writer, b []byte) error { return err } -func newHash(r *resource.Resource, p Params) (*resource.Resource, error) { +func newHash(r *resource.Resource, p filter.Params) (*resource.Resource, error) { h, err := toHash(p.String("algorithm", "md5")) if err != nil { return nil, err @@ -116,5 +117,5 @@ func newHash(r *resource.Resource, p Params) (*resource.Resource, error) { } func init() { - MustRegister("hash", newHash) + filter.MustRegister("hash", newHash) } diff --git a/filter/hash/hash_test.go b/filter/hash/hash_test.go new file mode 100644 index 0000000..8c1265e --- /dev/null +++ b/filter/hash/hash_test.go @@ -0,0 +1,42 @@ +package hash + +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) + +func TestHashFilter(t *testing.T) { + // encoding variations + filtertest.Check(t, newHash, + filter.Params{}, + "", + "d41d8cd98f00b204e9800998ecf8427e") + filtertest.Check(t, newHash, + filter.Params{ "encoding": "base64", }, + "", + "1B2M2Y8AsgTpgAmY7PhCfg==") + filtertest.Check(t, newHash, + filter.Params{ "encoding": "binary", }, + "", + "\xd4\x1d\x8cُ\x00\xb2\x04\xe9\x80\t\x98\xec\xf8B~") + + // algorithm variations + filtertest.Check(t, newHash, + filter.Params{ "algorithm": "sha1"}, + "", + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + filtertest.Check(t, newHash, + filter.Params{ "algorithm": "sha1"}, + "", + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + filtertest.Check(t, newHash, + filter.Params{ "algorithm": "sha256"}, + "", + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") + filtertest.Check(t, newHash, + filter.Params{ "algorithm": "sha512"}, + "", + "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e") +} From 73ccb7cd06c84311111ba2053c4cb602c6f69552 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 00:58:17 +0900 Subject: [PATCH 08/12] curve out ltsv filter to independent package --- filter/filter_test.go | 28 ---------------------------- filter/{ => ltsv}/ltsv.go | 11 ++++++----- filter/{ => ltsv}/ltsv_test.go | 11 ++++++++--- 3 files changed, 14 insertions(+), 36 deletions(-) delete mode 100644 filter/filter_test.go rename filter/{ => ltsv}/ltsv.go (90%) rename filter/{ => ltsv}/ltsv_test.go (66%) diff --git a/filter/filter_test.go b/filter/filter_test.go deleted file mode 100644 index 2693e94..0000000 --- a/filter/filter_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package filter - -import ( - "io" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/koron/nvgd/resource" -) - -func checkFilter(t *testing.T, f Factory, p Params, in, want string) { - t.Helper() - src := resource.NewString(in) - filter, err := f(src, p) - if err != nil { - t.Errorf("failed to create a filter: %s", err) - return - } - b, err := io.ReadAll(filter) - if err != nil { - t.Errorf("failed to read from filter: %s", err) - return - } - got := string(b) - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("unexpected output from filter: -want +got\n%s", diff) - } -} diff --git a/filter/ltsv.go b/filter/ltsv/ltsv.go similarity index 90% rename from filter/ltsv.go rename to filter/ltsv/ltsv.go index 36670c1..b557ac7 100644 --- a/filter/ltsv.go +++ b/filter/ltsv/ltsv.go @@ -1,4 +1,4 @@ -package filter +package ltsv import ( "bytes" @@ -6,6 +6,7 @@ import ( "regexp" "strings" + "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/filterbase" "github.com/koron/nvgd/resource" ) @@ -127,7 +128,7 @@ func (l *LTSV) filter(v ltsvValue) ltsvValue { return r } -func newLTSV(r *resource.Resource, p Params) (*resource.Resource, error) { +func newLTSV(r *resource.Resource, p filter.Params) (*resource.Resource, error) { label, re, err := parseGrep(p) if err != nil { return nil, err @@ -137,7 +138,7 @@ func newLTSV(r *resource.Resource, p Params) (*resource.Resource, error) { return r.Wrap(NewLTSV(r, label, re, match, cut)), nil } -func parseGrep(p Params) (label string, pattern *regexp.Regexp, err error) { +func parseGrep(p filter.Params) (label string, pattern *regexp.Regexp, err error) { v := strings.SplitN(p.String("grep", ""), ",", 2) if len(v) < 2 || v[0] == "" || v[1] == "" { return "", nil, nil @@ -149,7 +150,7 @@ func parseGrep(p Params) (label string, pattern *regexp.Regexp, err error) { return v[0], re, err } -func parseCut(p Params) []string { +func parseCut(p filter.Params) []string { s := p.String("cut", "") if s == "" { return []string{} @@ -158,5 +159,5 @@ func parseCut(p Params) []string { } func init() { - MustRegister("lstv", newLTSV) + filter.MustRegister("lstv", newLTSV) } diff --git a/filter/ltsv_test.go b/filter/ltsv/ltsv_test.go similarity index 66% rename from filter/ltsv_test.go rename to filter/ltsv/ltsv_test.go index 56c6d61..fd12c2d 100644 --- a/filter/ltsv_test.go +++ b/filter/ltsv/ltsv_test.go @@ -1,12 +1,17 @@ -package filter +package ltsv -import "testing" +import ( + "testing" + + "github.com/koron/nvgd/filter" + "github.com/koron/nvgd/internal/filtertest" +) func TestLTSVFilter(t *testing.T) { // TODO: fix LTSV filter. // internal/ltsvを使うように変更し、その他のテストも書く t.Skip("invalid just for now") - checkFilter(t, newLTSV, Params{}, + filtertest.Check(t, newLTSV, filter.Params{}, "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n") } From a5762e61bb6110efa7c91e186196369e2988ca70 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 01:35:26 +0900 Subject: [PATCH 09/12] improve ltsv filter --- filter/ltsv/ltsv.go | 83 +++++++----------------------- filter/ltsv/ltsv_test.go | 33 ++++++++++-- internal/filterbase/ltsv_reader.go | 10 ++-- internal/ltsv/reader.go | 6 +++ 4 files changed, 60 insertions(+), 72 deletions(-) diff --git a/filter/ltsv/ltsv.go b/filter/ltsv/ltsv.go index b557ac7..f77fe5f 100644 --- a/filter/ltsv/ltsv.go +++ b/filter/ltsv/ltsv.go @@ -8,54 +8,14 @@ import ( "github.com/koron/nvgd/filter" "github.com/koron/nvgd/internal/filterbase" + "github.com/koron/nvgd/internal/ltsv" "github.com/koron/nvgd/resource" ) -type ltsvValue map[string][]string - -func parseLTSV(s string) ltsvValue { - r := ltsvValue{} - for _, raw := range strings.Split(s, "\t") { - kv := strings.SplitN(raw, ":", 2) - if len(kv) != 2 { - continue - } - k, v := kv[0], kv[1] - slot := r[k] - r[k] = append(slot, v) - } - return r -} - -func (v ltsvValue) put(buf *bytes.Buffer) error { - first := true - for k, slot := range v { - for _, v := range slot { - if first { - first = false - } else { - if _, err := buf.WriteString("\t"); err != nil { - return err - } - } - if _, err := buf.WriteString(k); err != nil { - return err - } - if _, err := buf.WriteString(":"); err != nil { - return err - } - if _, err := buf.WriteString(v); err != nil { - return err - } - } - } - return nil -} - // LTSV represents a structure for LTSV (labeled tab separated value) type LTSV struct { filterbase.Base - reader *filterbase.LineReader + reader *filterbase.LTSVReader label string re *regexp.Regexp match bool @@ -66,7 +26,7 @@ type LTSV struct { func NewLTSV(r io.ReadCloser, label string, re *regexp.Regexp, match bool, cut []string) *LTSV { l := <SV{ label: label, - reader: filterbase.NewLineReader(r), + reader: filterbase.NewLTSVReader(r), re: re, match: match, cut: cut, @@ -77,35 +37,28 @@ func NewLTSV(r io.ReadCloser, label string, re *regexp.Regexp, match bool, cut [ func (l *LTSV) readNext(buf *bytes.Buffer) error { for { - b, err := l.reader.ReadLine() + row, err := l.reader.Read() if err != nil { return err } - if last := len(b) - 1; last > 0 && b[last] == '\n' { - b = b[0:last] - } - v := parseLTSV(string(b)) - if !l.isMatch(v) { + if l.isMatch(row) != l.match { continue } - if err := l.filter(v).put(buf); err != nil { - return err - } - if _, err := buf.WriteString("\n"); err != nil { + if err = ltsv.Write(buf, l.filter(row).Properties); err != nil { return err } } } -func (l *LTSV) isMatch(v ltsvValue) bool { +func (l *LTSV) isMatch(row *ltsv.Set) bool { if l.label == "" { return true } - slot, ok := v[l.label] - if !ok { + values := row.Get(l.label) + if len(values) == 0 { return false } - for _, v := range slot { + for _, v := range values { if l.re.MatchString(v) { return true } @@ -113,19 +66,21 @@ func (l *LTSV) isMatch(v ltsvValue) bool { return false } -func (l *LTSV) filter(v ltsvValue) ltsvValue { +func (l *LTSV) filter(row *ltsv.Set) *ltsv.Set { if len(l.cut) == 0 { - return v + return row } - r := ltsvValue{} + newRow := ltsv.NewSet() for _, label := range l.cut { - slot, ok := v[label] - if !ok { + values := row.Get(label) + if len(values) == 0 { continue } - r[label] = slot + for _, v := range values { + newRow.Put(label, v) + } } - return r + return newRow } func newLTSV(r *resource.Resource, p filter.Params) (*resource.Resource, error) { diff --git a/filter/ltsv/ltsv_test.go b/filter/ltsv/ltsv_test.go index fd12c2d..6c714e7 100644 --- a/filter/ltsv/ltsv_test.go +++ b/filter/ltsv/ltsv_test.go @@ -8,10 +8,35 @@ import ( ) func TestLTSVFilter(t *testing.T) { - // TODO: fix LTSV filter. - // internal/ltsvを使うように変更し、その他のテストも書く - t.Skip("invalid just for now") - filtertest.Check(t, newLTSV, filter.Params{}, + filtertest.Check(t, newLTSV, + filter.Params{}, "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n") + + filtertest.Check(t, newLTSV, + filter.Params{"grep": "b,555"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:444\tb:555\tc:666\n") + filtertest.Check(t, newLTSV, + filter.Params{"grep": "c,333"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:111\tb:222\tc:333\n") + filtertest.Check(t, newLTSV, + filter.Params{"grep": "a,777"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:777\tb:888\tc:999\n") + + filtertest.Check(t, newLTSV, + filter.Params{"match": "false", "grep": "b,555"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:111\tb:222\tc:333\na:777\tb:888\tc:999\n") + + filtertest.Check(t, newLTSV, + filter.Params{"cut": "a"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "a:111\na:444\na:777\n") + filtertest.Check(t, newLTSV, + filter.Params{"cut": "c,b"}, + "a:111\tb:222\tc:333\na:444\tb:555\tc:666\na:777\tb:888\tc:999\n", + "c:333\tb:222\nc:666\tb:555\nc:999\tb:888\n") } diff --git a/internal/filterbase/ltsv_reader.go b/internal/filterbase/ltsv_reader.go index 870400e..68a88f6 100644 --- a/internal/filterbase/ltsv_reader.go +++ b/internal/filterbase/ltsv_reader.go @@ -9,11 +9,13 @@ import ( ) type LTSVReader struct { - r ltsv.Reader + r *ltsv.Reader } -func NewLTSVReader(r io.Reader) *ltsv.Reader { - return ltsv.NewReaderSize(r, Config.MaxLineLen) +func NewLTSVReader(r io.Reader) *LTSVReader { + return <SVReader{ + r: ltsv.NewReaderSize(r, Config.MaxLineLen), + } } func (r *LTSVReader) Read() (*ltsv.Set, error) { @@ -21,5 +23,5 @@ func (r *LTSVReader) Read() (*ltsv.Set, error) { if errors.Is(err, bufio.ErrBufferFull) { return nil, ErrMaxLineExceeded } - return set, nil + return set, err } diff --git a/internal/ltsv/reader.go b/internal/ltsv/reader.go index a964bf9..c5c9560 100644 --- a/internal/ltsv/reader.go +++ b/internal/ltsv/reader.go @@ -59,6 +59,12 @@ type Set struct { Index map[string][]int } +func NewSet() *Set { + return &Set{ + Index: make(map[string][]int), + } +} + // Put puts a property to the set. func (s *Set) Put(label, value string) { n := len(s.Properties) From b0ed0ba26764b62e1f0c308bc50b0902704905d6 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 01:38:58 +0900 Subject: [PATCH 10/12] minimize the scope of the LineFilter --- filter/grep/grep.go | 10 +++++----- filter/{ => grep}/line_filter.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) rename filter/{ => grep}/line_filter.go (98%) diff --git a/filter/grep/grep.go b/filter/grep/grep.go index 0721ec2..d4c91cf 100644 --- a/filter/grep/grep.go +++ b/filter/grep/grep.go @@ -19,7 +19,7 @@ type Grep struct { re *regexp.Regexp match bool - lf filter.LineFilter + lf LineFilter lnum bool cnum int @@ -28,12 +28,12 @@ type Grep struct { } // NewGrep creates an instance of grep filter. -func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf filter.LineFilter, lnum bool, cnum int) *Grep { +func NewGrep(r io.ReadCloser, re *regexp.Regexp, match bool, lf LineFilter, lnum bool, cnum int) *Grep { g := &Grep{ reader: filterbase.NewLineReader(r), re: re, match: match, - lf: filter.TrimEOL.Chain(lf), + lf: TrimEOL.Chain(lf), lnum: lnum, cnum: cnum, } @@ -99,14 +99,14 @@ func newGrep(r *resource.Resource, p filter.Params) (*resource.Resource, error) match := p.Bool("match", true) lnum := p.Bool("number", false) cnum := p.Int("context", 0) - var lf filter.LineFilter + var lf LineFilter // field filter var ( field = p.Int("field", 0) delim = []byte(p.String("delim", "\t")) ) if field > 0 { - lf = lf.Chain(filter.NewCutLF(delim, field-1)) + lf = lf.Chain(NewCutLF(delim, field-1)) } return r.Wrap(NewGrep(r, re, match, lf, lnum, cnum)), nil } diff --git a/filter/line_filter.go b/filter/grep/line_filter.go similarity index 98% rename from filter/line_filter.go rename to filter/grep/line_filter.go index 2fc6d9c..3f48ab5 100644 --- a/filter/line_filter.go +++ b/filter/grep/line_filter.go @@ -1,4 +1,4 @@ -package filter +package grep import "bytes" From 15b91bfb7722574970b734401a6d0a856a91633e Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 01:45:17 +0900 Subject: [PATCH 11/12] separate files in plugins package --- plugins/filters.go | 19 +++++++++++++++++++ plugins/plugins.go | 24 ------------------------ plugins/protocols.go | 14 ++++++++++++++ 3 files changed, 33 insertions(+), 24 deletions(-) create mode 100644 plugins/filters.go create mode 100644 plugins/protocols.go diff --git a/plugins/filters.go b/plugins/filters.go new file mode 100644 index 0000000..6838b42 --- /dev/null +++ b/plugins/filters.go @@ -0,0 +1,19 @@ +package plugins + +import ( + _ "github.com/koron/nvgd/filter/count" + _ "github.com/koron/nvgd/filter/cut" + _ "github.com/koron/nvgd/filter/echarts" + _ "github.com/koron/nvgd/filter/grep" + _ "github.com/koron/nvgd/filter/hash" + _ "github.com/koron/nvgd/filter/head" + _ "github.com/koron/nvgd/filter/htmltable" + _ "github.com/koron/nvgd/filter/indexhtml" + _ "github.com/koron/nvgd/filter/jsonarray" + _ "github.com/koron/nvgd/filter/ltsv" + _ "github.com/koron/nvgd/filter/markdown" + _ "github.com/koron/nvgd/filter/pager" + _ "github.com/koron/nvgd/filter/tail" + _ "github.com/koron/nvgd/filter/texttable" + _ "github.com/koron/nvgd/filter/trdsql" +) diff --git a/plugins/plugins.go b/plugins/plugins.go index cd01f49..23048dd 100644 --- a/plugins/plugins.go +++ b/plugins/plugins.go @@ -1,26 +1,2 @@ // Package plugins load default protocols and filters for nvgd. package plugins - -import ( - _ "github.com/koron/nvgd/filter/count" - _ "github.com/koron/nvgd/filter/cut" - _ "github.com/koron/nvgd/filter/echarts" - _ "github.com/koron/nvgd/filter/htmltable" - _ "github.com/koron/nvgd/filter/indexhtml" - _ "github.com/koron/nvgd/filter/jsonarray" - _ "github.com/koron/nvgd/filter/markdown" - _ "github.com/koron/nvgd/filter/pager" - _ "github.com/koron/nvgd/filter/tail" - _ "github.com/koron/nvgd/filter/texttable" - _ "github.com/koron/nvgd/filter/trdsql" - _ "github.com/koron/nvgd/protocol/aws" - _ "github.com/koron/nvgd/protocol/configp" - _ "github.com/koron/nvgd/protocol/db" - _ "github.com/koron/nvgd/protocol/echarts" - _ "github.com/koron/nvgd/protocol/examples" - _ "github.com/koron/nvgd/protocol/file" - _ "github.com/koron/nvgd/protocol/help" - _ "github.com/koron/nvgd/protocol/redis" - _ "github.com/koron/nvgd/protocol/trdsql" - _ "github.com/koron/nvgd/protocol/version" -) diff --git a/plugins/protocols.go b/plugins/protocols.go new file mode 100644 index 0000000..1c8177e --- /dev/null +++ b/plugins/protocols.go @@ -0,0 +1,14 @@ +package plugins + +import ( + _ "github.com/koron/nvgd/protocol/aws" + _ "github.com/koron/nvgd/protocol/configp" + _ "github.com/koron/nvgd/protocol/db" + _ "github.com/koron/nvgd/protocol/echarts" + _ "github.com/koron/nvgd/protocol/examples" + _ "github.com/koron/nvgd/protocol/file" + _ "github.com/koron/nvgd/protocol/help" + _ "github.com/koron/nvgd/protocol/redis" + _ "github.com/koron/nvgd/protocol/trdsql" + _ "github.com/koron/nvgd/protocol/version" +) From 34fa69160a4ff5d3e8803027a6a05eb6074fb7c5 Mon Sep 17 00:00:00 2001 From: MURAOKA Taro Date: Fri, 6 Sep 2024 01:51:58 +0900 Subject: [PATCH 12/12] add head documents for packages --- filter/grep/grep.go | 1 + filter/hash/hash.go | 1 + filter/head/head.go | 1 + filter/ltsv/ltsv.go | 1 + internal/filterbase/base.go | 1 + 5 files changed, 5 insertions(+) diff --git a/filter/grep/grep.go b/filter/grep/grep.go index d4c91cf..0d4c490 100644 --- a/filter/grep/grep.go +++ b/filter/grep/grep.go @@ -1,3 +1,4 @@ +// Package grep provides "grep" filter for NVGD. package grep import ( diff --git a/filter/hash/hash.go b/filter/hash/hash.go index 684b81b..59161a2 100644 --- a/filter/hash/hash.go +++ b/filter/hash/hash.go @@ -1,3 +1,4 @@ +// Package hash provides a filter to calculate hashes like MD5, SHA1 or so. package hash import ( diff --git a/filter/head/head.go b/filter/head/head.go index b679f9d..72cfb58 100644 --- a/filter/head/head.go +++ b/filter/head/head.go @@ -1,3 +1,4 @@ +// Package head provides "head" filter for NVGD. package head import ( diff --git a/filter/ltsv/ltsv.go b/filter/ltsv/ltsv.go index f77fe5f..f6bc431 100644 --- a/filter/ltsv/ltsv.go +++ b/filter/ltsv/ltsv.go @@ -1,3 +1,4 @@ +// Package ltsv provides a filter modify (grep and cut) LTSV. package ltsv import ( diff --git a/internal/filterbase/base.go b/internal/filterbase/base.go index 5fe40d6..23507a3 100644 --- a/internal/filterbase/base.go +++ b/internal/filterbase/base.go @@ -1,3 +1,4 @@ +// Package filterbase provides utility tools to implement filters. package filterbase import (