From 8b5de2e8c2c563810a459cabf2e27d4da836dd42 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 7 Feb 2021 16:15:47 +0000 Subject: [PATCH 01/22] Moved CSV logic into base package. --- modules/base/csv.go | 77 +++++++++++++++++++++++++++++++++++++++ modules/base/csv_test.go | 40 ++++++++++++++++++++ modules/markup/csv/csv.go | 61 ++----------------------------- 3 files changed, 120 insertions(+), 58 deletions(-) create mode 100644 modules/base/csv.go create mode 100644 modules/base/csv_test.go diff --git a/modules/base/csv.go b/modules/base/csv.go new file mode 100644 index 0000000000000..53fcf42a6e4ff --- /dev/null +++ b/modules/base/csv.go @@ -0,0 +1,77 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package base + +import ( + "bytes" + "encoding/csv" + "regexp" + "strings" + + "code.gitea.io/gitea/modules/util" +) + +var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) + +// CreateCsvReader creates a CSV reader with the given delimiter. +func CreateCsvReader(rawBytes []byte, delimiter rune) *csv.Reader { + rd := csv.NewReader(bytes.NewReader(rawBytes)) + rd.Comma = delimiter + return rd +} + +// CreateCsvReaderAndGuessDelimiter creates a CSV reader with a guessed delimiter. +func CreateCsvReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader { + delimiter := guessDelimiter(rawBytes) + return CreateCsvReader(rawBytes, delimiter) +} + +// guessDelimiter scores the input CSV data against delimiters, and returns the best match. +// Reads at most 10k bytes & 10 lines. +func guessDelimiter(data []byte) rune { + maxLines := 10 + maxBytes := util.Min(len(data), 1e4) + text := string(data[:maxBytes]) + text = quoteRegexp.ReplaceAllLiteralString(text, "") + lines := strings.SplitN(text, "\n", maxLines+1) + lines = lines[:util.Min(maxLines, len(lines))] + + delimiters := []rune{',', ';', '\t', '|'} + bestDelim := delimiters[0] + bestScore := 0.0 + for _, delim := range delimiters { + score := scoreDelimiter(lines, delim) + if score > bestScore { + bestScore = score + bestDelim = delim + } + } + + return bestDelim +} + +// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV +func scoreDelimiter(lines []string, delim rune) float64 { + countTotal := 0 + countLineMax := 0 + linesNotEqual := 0 + + for _, line := range lines { + if len(line) == 0 { + continue + } + + countLine := strings.Count(line, string(delim)) + countTotal += countLine + if countLine != countLineMax { + if countLineMax != 0 { + linesNotEqual++ + } + countLineMax = util.Max(countLine, countLineMax) + } + } + + return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) +} diff --git a/modules/base/csv_test.go b/modules/base/csv_test.go new file mode 100644 index 0000000000000..bd9952e2c1153 --- /dev/null +++ b/modules/base/csv_test.go @@ -0,0 +1,40 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package base + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCreateCsvReader(t *testing.T) { + rd := CreateCsvReader([]byte{}, ',') + assert.Equal(t, ',', rd.Comma) +} + +func TestCreateCsvReaderAndGuessDelimiter(t *testing.T) { + input := "a;b;c\n1;2;3\n4;5;6" + + rd := CreateCsvReaderAndGuessDelimiter([]byte(input)) + assert.Equal(t, ';', rd.Comma) +} + +func TestGuessDelimiter(t *testint.T) { + var kases = map[string]rune{ + "a": ',', + "1,2": ',', + "1;2": ';', + "1\t2": '\t', + "1|2": '|', + "1,2,3;4,5,6;7,8,9\na;b;c": ';', + "\"1,2,3,4\";\"a\nb\"\nc;d": ';', + "
": ',', + } + + for k, v := range kases { + assert.EqualValues(t, guessDelimiter([]byte(k)), v) + } +} \ No newline at end of file diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1e3acc9b47e2f..64a9438c9cc3c 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -6,21 +6,15 @@ package markup import ( "bytes" - "encoding/csv" "html" "io" - "regexp" - "strings" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/markup" - "code.gitea.io/gitea/modules/util" ) -var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) - func init() { markup.RegisterParser(Parser{}) - } // Parser implements markup.Parser for orgmode @@ -38,9 +32,8 @@ func (Parser) Extensions() []string { } // Render implements markup.Parser -func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - rd := csv.NewReader(bytes.NewReader(rawBytes)) - rd.Comma = p.bestDelimiter(rawBytes) +func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { + rd := base.CreateCsvReaderAndGuessDelimiter(rawBytes) var tmpBlock bytes.Buffer tmpBlock.WriteString(``) for { @@ -63,51 +56,3 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin return tmpBlock.Bytes() } - -// bestDelimiter scores the input CSV data against delimiters, and returns the best match. -// Reads at most 10k bytes & 10 lines. -func (p Parser) bestDelimiter(data []byte) rune { - maxLines := 10 - maxBytes := util.Min(len(data), 1e4) - text := string(data[:maxBytes]) - text = quoteRegexp.ReplaceAllLiteralString(text, "") - lines := strings.SplitN(text, "\n", maxLines+1) - lines = lines[:util.Min(maxLines, len(lines))] - - delimiters := []rune{',', ';', '\t', '|'} - bestDelim := delimiters[0] - bestScore := 0.0 - for _, delim := range delimiters { - score := p.scoreDelimiter(lines, delim) - if score > bestScore { - bestScore = score - bestDelim = delim - } - } - - return bestDelim -} - -// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV -func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { - countTotal := 0 - countLineMax := 0 - linesNotEqual := 0 - - for _, line := range lines { - if len(line) == 0 { - continue - } - - countLine := strings.Count(line, string(delim)) - countTotal += countLine - if countLine != countLineMax { - if countLineMax != 0 { - linesNotEqual++ - } - countLineMax = util.Max(countLine, countLineMax) - } - } - - return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) -} From d8d9211d081929bb925e6b052b009a4288224388 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 7 Feb 2021 17:46:58 +0000 Subject: [PATCH 02/22] Added method to create a tabular diff. --- services/gitdiff/csv.go | 303 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 303 insertions(+) create mode 100644 services/gitdiff/csv.go diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go new file mode 100644 index 0000000000000..74cdabd19870f --- /dev/null +++ b/services/gitdiff/csv.go @@ -0,0 +1,303 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + + "code.gitea.io/gitea/modules/util" +) + +const unmappedColumn = -1 + +// TableDiffCellType represents the type of a TableDiffCell. +type TableDiffCellType uint8 + +// TableDiffCellType possible values. +const ( + TableDiffCellEqual TableDiffCellType = iota + 1 + TableDiffCellChanged + TableDiffCellAdd + TableDiffCellDel +) + +// TableDiffCell represents a cell of a TableDiffRow +type TableDiffCell struct { + LeftCell *string + RightCell *string + Type TableDiffCellType +} + +// TableDiffRow represents a row of a TableDiffSection. +type TableDiffRow struct { + RowIdx int + Cells []*TableDiffCell +} + +// TableDiffSection represents a section of a DiffFile. +type TableDiffSection struct { + Rows []*TableDiffRow +} + +// CreateCsvDiff creates a tabular diff based on two CSV readers. +func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + if baseReader != nil && headReader != nil { + return createCsvDiff(diffFile, baseReader, headReader) + } + + if baseReader != nil { + return createCsvDiffSingle(baseReader, TableDiffCellDel) + } + return createCsvDiffSingle(headReader, TableDiffCellAdd) +} + +// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. +func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { + a, err := reader.ReadAll() + if err != nil { + return nil, err + } + + rows := make([]*TableDiffRow, len(a)) + for i, row := range a { + cells := make([]*TableDiffCell, len(row)) + for j := 0; j < len(row); j++ { + cells[j] = &TableDiffCell{ LeftCell: &row[j], Type: celltype } + } + rows[i] = &TableDiffRow{ RowIdx: i + 1, Cells: cells } + } + + return []*TableDiffSection{&TableDiffSection{ Rows: rows}}, nil +} + +func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + arows, err := baseReader.ReadAll() + if err != nil { + return nil, err + } + a := arows[:] + + brows, err := headReader.ReadAll() + if err != nil { + return nil, err + } + b := brows[:] + + a2b, b2a := getColumnMapping(a, b) + + columns := len(a2b) + countUnmappedColumns(b2a) + if len(a2b) < len(b2a) { + columns = len(b2a) + countUnmappedColumns(a2b) + } + + createDiffRow := func(aline int, bline int) *TableDiffRow { + cells := make([]*TableDiffCell, columns) + + if aline == 0 || bline == 0 { + var ( + row []string + celltype TableDiffCellType + ) + if bline == 0 { + row = getRow(a, aline - 1) + celltype = TableDiffCellDel + } else { + row = getRow(b, bline - 1) + celltype = TableDiffCellAdd + } + if row == nil { + return nil + } + for i := 0; i < len(row); i++ { + cells[i] = &TableDiffCell{ LeftCell: &row[i], Type: celltype } + } + return &TableDiffRow{ RowIdx: bline, Cells: cells } + } + + arow := getRow(a, aline - 1) + brow := getRow(b, bline - 1) + for i := 0; i < len(a2b); i++ { + if a2b[i] == unmappedColumn { + cells[i] = &TableDiffCell{ LeftCell: getCell(arow, i), Type: TableDiffCellDel } + } else { + acell := getCell(arow, i) + bcell := getCell(brow, a2b[i]) + + celltype := TableDiffCellChanged + if acell != nil && bcell != nil && *acell == *bcell { + celltype = TableDiffCellEqual + } + + cells[i] = &TableDiffCell{ LeftCell: acell, RightCell: bcell, Type: celltype } + } + } + for i := 0; i < len(b2a); i++ { + if b2a[i] == unmappedColumn { + cells[i] = &TableDiffCell{ RightCell: getCell(brow, i), Type: TableDiffCellAdd } + } + } + + return &TableDiffRow{ RowIdx: bline, Cells: cells } + } + + var sections []*TableDiffSection + + for i, section := range diffFile.Sections { + var rows []*TableDiffRow + lines := tryMergeLines(section.Lines) + for j, line := range lines { + if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { + diffRow := createDiffRow(1, 1) + if diffRow != nil { + rows = append(rows, diffRow) + } + } + diffRow := createDiffRow(line[0], line[1]) + if diffRow != nil { + rows = append(rows, diffRow) + } + } + + if len(rows) > 0 { + sections = append(sections, &TableDiffSection{ Rows: rows}) + } + } + + return sections, nil +} + +// getColumnMapping creates a mapping of columns between a and b +func getColumnMapping(a [][]string, b [][]string) ([]int, []int) { + arow := getRow(a, 0) + brow := getRow(b, 0) + + a2b := []int{}; + b2a := []int{}; + + if arow != nil { + a2b = make([]int, len(a[0])) + } + if brow != nil { + b2a = make([]int, len(b[0])) + } + + for i := 0; i < len(b2a); i++ { + b2a[i] = unmappedColumn + } + + bcol := 0 + for i := 0; i < len(a2b); i++ { + a2b[i] = unmappedColumn + + acell := getCell(arow, i) + if acell != nil { + for j := bcol; j < len(b2a); j++ { + bcell := getCell(brow, j) + if bcell != nil && *acell == *bcell { + a2b[i] = j + b2a[j] = i + bcol = j + 1 + break + } + } + } + } + + tryMapColumnsByContent(a, a2b, b, b2a) + tryMapColumnsByContent(b, b2a, a, a2b) + + return a2b, b2a +} + +// tryMapColumnsByContent tries to map missing columns by the content of the first lines. +func tryMapColumnsByContent(a [][]string, a2b []int, b [][]string, b2a []int) { + const MaxRows int = 10 + const MinRatio float32 = 0.8 + + start := 0 + for i := 0; i < len(a2b); i++ { + if a2b[i] == unmappedColumn { + if b2a[start] == unmappedColumn { + rows := util.Min(MaxRows, util.Max(0, util.Min(len(a), len(b)) - 1)) + same := 0 + for j := 1; j <= rows; j++ { + acell := getCell(getRow(a, j), i) + bcell := getCell(getRow(b, j), start + 1) + if acell != nil && bcell != nil && *acell == *bcell { + same++ + } + } + if (float32(same) / float32(rows)) > MinRatio { + a2b[i] = start + 1 + b2a[start + 1] = i + } + } + } + start = a2b[i] + } +} + +// getRow returns the specific row or nil if not present. +func getRow(records [][]string, row int) []string { + if row < len(records) { + return records[row] + } + return nil +} + +// getCell returns the specific cell or nil if not present. +func getCell(row []string, column int) *string { + if column < len(row) { + return &row[column] + } + return nil +} + +// countUnmappedColumns returns the count of unmapped columns. +func countUnmappedColumns(mapping []int) int { + count := 0 + for i := 0; i < len(mapping); i++ { + if mapping[i] == unmappedColumn { + count++ + } + } + return count +} + +// tryMergeLines maps the seperated line numbers of a git diff. +func tryMergeLines(lines []*DiffLine) [][2]int { + ids := make([][2]int, len(lines)) + + i := 0 + for _, line := range lines { + if line.Type != DiffLineSection { + ids[i][0] = line.LeftIdx + ids[i][1] = line.RightIdx + i++ + } + } + + ids = ids[:i] + + result := make([][2]int, len(ids)) + + j := 0 + for i = 0; i < len(ids); i++ { + if ids[i][0] == 0 { + if j > 0 && result[j - 1][1] == 0 { + temp := j + for temp > 0 && result[temp - 1][1] == 0 { + temp-- + } + result[temp][1] = ids[i][1] + continue + } + } + result[j] = ids[i] + j++ + } + + return result[:j] +} \ No newline at end of file From 6b06e9a38285a58f98ad8f2c69cfb28b008b0980 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 7 Feb 2021 18:15:25 +0000 Subject: [PATCH 03/22] Added CSV compare context. --- routers/repo/commit.go | 3 +-- routers/repo/compare.go | 59 +++++++++++++++++++++++++++++++++++++++-- routers/repo/pull.go | 6 ++--- 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/routers/repo/commit.go b/routers/repo/commit.go index c411d247e2e59..78c002c6d25b3 100644 --- a/routers/repo/commit.go +++ b/routers/repo/commit.go @@ -335,9 +335,8 @@ func Diff(ctx *context.Context) { return } } - setImageCompareContext(ctx, parentCommit, commit) headTarget := path.Join(userName, repoName) - setPathsCompareContext(ctx, parentCommit, commit, headTarget) + setCompareContext(ctx, parentCommit, commit, headTarget) ctx.Data["Title"] = commit.Summary() + " ยท " + base.ShortSha(commitID) ctx.Data["Commit"] = commit verification := models.ParseCommitWithSignature(commit) diff --git a/routers/repo/compare.go b/routers/repo/compare.go index 218f712469530..d93ec5dab3a81 100644 --- a/routers/repo/compare.go +++ b/routers/repo/compare.go @@ -6,9 +6,12 @@ package repo import ( "bufio" + "encoding/csv" "fmt" "html" + "io/ioutil" "path" + "path/filepath" "strings" "code.gitea.io/gitea/models" @@ -26,6 +29,16 @@ const ( tplBlobExcerpt base.TplName = "repo/diff/blob_excerpt" ) +// setCompareContext sets context data. +func setCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) { + ctx.Data["BaseCommit"] = base + ctx.Data["HeadCommit"] = head + + setPathsCompareContext(ctx, base, head, headTarget) + setImageCompareContext(ctx, base, head) + setCsvCompareContext(ctx) +} + // setPathsCompareContext sets context data for source and raw paths func setPathsCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) { sourcePath := setting.AppSubURL + "/%s/src/commit/%s" @@ -65,6 +78,49 @@ func setImageCompareContext(ctx *context.Context, base *git.Commit, head *git.Co } } +// setCsvCompareContext sets context data that is required by the CSV compare template +func setCsvCompareContext(ctx *context.Context) { + ctx.Data["IsCsvFile"] = func(diffFile *gitdiff.DiffFile) bool { + extension := strings.ToLower(filepath.Ext(diffFile.Name)) + return extension == ".csv" || extension == ".tsv" + } + ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) []*gitdiff.TableDiffSection { + if diffFile == nil || baseCommit == nil || headCommit == nil { + return nil + } + + csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) { + blob, err := c.GetBlobByPath(diffFile.Name) + if err != nil { + return nil, err + } + + reader, err := blob.DataAsync() + if err != nil { + return nil, err + } + defer reader.Close() + + b, err := ioutil.ReadAll(reader) + if err != nil { + return nil, err + } + + return base.CreateCsvReaderAndGuessDelimiter(b), nil + } + + baseReader, _ := csvReaderFromCommit(baseCommit) + headReader, _ := csvReaderFromCommit(headCommit) + + sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader) + if err != nil { + log.Error("RenderCsvDiff failed: %v", err) + return nil + } + return sections + } +} + // ParseCompareInfo parse compare info between two commit for preparing comparing references func ParseCompareInfo(ctx *context.Context) (*models.User, *models.Repository, *git.Repository, *git.CompareInfo, string, string) { baseRepo := ctx.Repo.Repository @@ -499,9 +555,8 @@ func PrepareCompareDiff( ctx.Data["Username"] = headUser.Name ctx.Data["Reponame"] = headRepo.Name - setImageCompareContext(ctx, baseCommit, headCommit) headTarget := path.Join(headUser.Name, repo.Name) - setPathsCompareContext(ctx, baseCommit, headCommit, headTarget) + setCompareContext(ctx, baseCommit, headCommit, headTarget) return false } diff --git a/routers/repo/pull.go b/routers/repo/pull.go index 69860021025b9..0badca9b1e386 100644 --- a/routers/repo/pull.go +++ b/routers/repo/pull.go @@ -594,7 +594,6 @@ func ViewPullFiles(ctx *context.Context) { gitRepo *git.Repository ) - var headTarget string var prInfo *git.CompareInfo if pull.HasMerged { prInfo = PrepareMergedViewPullInfo(ctx, issue) @@ -621,7 +620,6 @@ func ViewPullFiles(ctx *context.Context) { startCommitID = prInfo.MergeBase endCommitID = headCommitID - headTarget = path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name) ctx.Data["Username"] = ctx.Repo.Owner.Name ctx.Data["Reponame"] = ctx.Repo.Repository.Name ctx.Data["AfterCommitID"] = endCommitID @@ -675,8 +673,8 @@ func ViewPullFiles(ctx *context.Context) { } } - setImageCompareContext(ctx, baseCommit, commit) - setPathsCompareContext(ctx, baseCommit, commit, headTarget) + headTarget := path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name) + setCompareContext(ctx, baseCommit, commit, headTarget) ctx.Data["RequireHighlightJS"] = true ctx.Data["RequireSimpleMDE"] = true From eeb10bc27dfe34d1e385711d0fdf5fd5ef4d0ec7 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 7 Feb 2021 20:10:09 +0000 Subject: [PATCH 04/22] Prevent parser error for ' 1; "2"; 3' --- modules/base/csv.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/base/csv.go b/modules/base/csv.go index 53fcf42a6e4ff..c33ead67a6e84 100644 --- a/modules/base/csv.go +++ b/modules/base/csv.go @@ -19,6 +19,7 @@ var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) func CreateCsvReader(rawBytes []byte, delimiter rune) *csv.Reader { rd := csv.NewReader(bytes.NewReader(rawBytes)) rd.Comma = delimiter + rd.TrimLeadingSpace = true return rd } From fdab153aa2492290e85f3a15cd43ada1423191f4 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 8 Feb 2021 16:21:06 +0000 Subject: [PATCH 05/22] Prevent nil-cells. --- services/gitdiff/csv.go | 48 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go index 74cdabd19870f..7ce338df200ca 100644 --- a/services/gitdiff/csv.go +++ b/services/gitdiff/csv.go @@ -6,6 +6,7 @@ package gitdiff import ( "encoding/csv" + "errors" "code.gitea.io/gitea/modules/util" ) @@ -25,8 +26,8 @@ const ( // TableDiffCell represents a cell of a TableDiffRow type TableDiffCell struct { - LeftCell *string - RightCell *string + LeftCell string + RightCell string Type TableDiffCellType } @@ -64,7 +65,7 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab for i, row := range a { cells := make([]*TableDiffCell, len(row)) for j := 0; j < len(row); j++ { - cells[j] = &TableDiffCell{ LeftCell: &row[j], Type: celltype } + cells[j] = &TableDiffCell{ LeftCell: row[j], Type: celltype } } rows[i] = &TableDiffRow{ RowIdx: i + 1, Cells: cells } } @@ -111,22 +112,26 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R return nil } for i := 0; i < len(row); i++ { - cells[i] = &TableDiffCell{ LeftCell: &row[i], Type: celltype } + cells[i] = &TableDiffCell{ LeftCell: row[i], Type: celltype } } return &TableDiffRow{ RowIdx: bline, Cells: cells } } arow := getRow(a, aline - 1) brow := getRow(b, bline - 1) + if len(arow) == 0 && len(brow) == 0 { + return nil + } + for i := 0; i < len(a2b); i++ { + acell, _ := getCell(arow, i) if a2b[i] == unmappedColumn { - cells[i] = &TableDiffCell{ LeftCell: getCell(arow, i), Type: TableDiffCellDel } + cells[i] = &TableDiffCell{ LeftCell: acell, Type: TableDiffCellDel } } else { - acell := getCell(arow, i) - bcell := getCell(brow, a2b[i]) + bcell, _ := getCell(brow, a2b[i]) celltype := TableDiffCellChanged - if acell != nil && bcell != nil && *acell == *bcell { + if acell == bcell { celltype = TableDiffCellEqual } @@ -135,7 +140,8 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R } for i := 0; i < len(b2a); i++ { if b2a[i] == unmappedColumn { - cells[i] = &TableDiffCell{ RightCell: getCell(brow, i), Type: TableDiffCellAdd } + bcell, _ := getCell(brow, i) + cells[i] = &TableDiffCell{ RightCell: bcell, Type: TableDiffCellAdd } } } @@ -173,8 +179,8 @@ func getColumnMapping(a [][]string, b [][]string) ([]int, []int) { arow := getRow(a, 0) brow := getRow(b, 0) - a2b := []int{}; - b2a := []int{}; + a2b := []int{} + b2a := []int{} if arow != nil { a2b = make([]int, len(a[0])) @@ -191,11 +197,11 @@ func getColumnMapping(a [][]string, b [][]string) ([]int, []int) { for i := 0; i < len(a2b); i++ { a2b[i] = unmappedColumn - acell := getCell(arow, i) - if acell != nil { + acell, ea := getCell(arow, i) + if ea == nil { for j := bcol; j < len(b2a); j++ { - bcell := getCell(brow, j) - if bcell != nil && *acell == *bcell { + bcell, eb := getCell(brow, j) + if eb == nil && acell == bcell { a2b[i] = j b2a[j] = i bcol = j + 1 @@ -223,9 +229,9 @@ func tryMapColumnsByContent(a [][]string, a2b []int, b [][]string, b2a []int) { rows := util.Min(MaxRows, util.Max(0, util.Min(len(a), len(b)) - 1)) same := 0 for j := 1; j <= rows; j++ { - acell := getCell(getRow(a, j), i) - bcell := getCell(getRow(b, j), start + 1) - if acell != nil && bcell != nil && *acell == *bcell { + acell, ea := getCell(getRow(a, j), i) + bcell, eb := getCell(getRow(b, j), start + 1) + if ea == nil && eb == nil && acell == bcell { same++ } } @@ -248,11 +254,11 @@ func getRow(records [][]string, row int) []string { } // getCell returns the specific cell or nil if not present. -func getCell(row []string, column int) *string { +func getCell(row []string, column int) (string, error) { if column < len(row) { - return &row[column] + return row[column], nil } - return nil + return "", errors.New("Undefined column") } // countUnmappedColumns returns the count of unmapped columns. From 9fd31a22da554160338b8f9aba3a7f8871764494 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Thu, 11 Feb 2021 15:28:02 +0000 Subject: [PATCH 06/22] Lint --- services/gitdiff/csv.go | 48 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go index 7ce338df200ca..7ae82cfdda65b 100644 --- a/services/gitdiff/csv.go +++ b/services/gitdiff/csv.go @@ -65,12 +65,12 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab for i, row := range a { cells := make([]*TableDiffCell, len(row)) for j := 0; j < len(row); j++ { - cells[j] = &TableDiffCell{ LeftCell: row[j], Type: celltype } + cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} } - rows[i] = &TableDiffRow{ RowIdx: i + 1, Cells: cells } + rows[i] = &TableDiffRow{RowIdx: i + 1, Cells: cells} } - return []*TableDiffSection{&TableDiffSection{ Rows: rows}}, nil + return []*TableDiffSection{{Rows: rows}}, nil } func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { @@ -95,30 +95,30 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R createDiffRow := func(aline int, bline int) *TableDiffRow { cells := make([]*TableDiffCell, columns) - + if aline == 0 || bline == 0 { var ( - row []string + row []string celltype TableDiffCellType ) if bline == 0 { - row = getRow(a, aline - 1) + row = getRow(a, aline-1) celltype = TableDiffCellDel } else { - row = getRow(b, bline - 1) + row = getRow(b, bline-1) celltype = TableDiffCellAdd } if row == nil { return nil } for i := 0; i < len(row); i++ { - cells[i] = &TableDiffCell{ LeftCell: row[i], Type: celltype } + cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype} } - return &TableDiffRow{ RowIdx: bline, Cells: cells } + return &TableDiffRow{RowIdx: bline, Cells: cells} } - arow := getRow(a, aline - 1) - brow := getRow(b, bline - 1) + arow := getRow(a, aline-1) + brow := getRow(b, bline-1) if len(arow) == 0 && len(brow) == 0 { return nil } @@ -126,26 +126,26 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R for i := 0; i < len(a2b); i++ { acell, _ := getCell(arow, i) if a2b[i] == unmappedColumn { - cells[i] = &TableDiffCell{ LeftCell: acell, Type: TableDiffCellDel } + cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel} } else { bcell, _ := getCell(brow, a2b[i]) - + celltype := TableDiffCellChanged if acell == bcell { celltype = TableDiffCellEqual } - cells[i] = &TableDiffCell{ LeftCell: acell, RightCell: bcell, Type: celltype } + cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype} } } for i := 0; i < len(b2a); i++ { if b2a[i] == unmappedColumn { bcell, _ := getCell(brow, i) - cells[i] = &TableDiffCell{ RightCell: bcell, Type: TableDiffCellAdd } + cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd} } } - - return &TableDiffRow{ RowIdx: bline, Cells: cells } + + return &TableDiffRow{RowIdx: bline, Cells: cells} } var sections []*TableDiffSection @@ -167,7 +167,7 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R } if len(rows) > 0 { - sections = append(sections, &TableDiffSection{ Rows: rows}) + sections = append(sections, &TableDiffSection{Rows: rows}) } } @@ -226,18 +226,18 @@ func tryMapColumnsByContent(a [][]string, a2b []int, b [][]string, b2a []int) { for i := 0; i < len(a2b); i++ { if a2b[i] == unmappedColumn { if b2a[start] == unmappedColumn { - rows := util.Min(MaxRows, util.Max(0, util.Min(len(a), len(b)) - 1)) + rows := util.Min(MaxRows, util.Max(0, util.Min(len(a), len(b))-1)) same := 0 for j := 1; j <= rows; j++ { acell, ea := getCell(getRow(a, j), i) - bcell, eb := getCell(getRow(b, j), start + 1) + bcell, eb := getCell(getRow(b, j), start+1) if ea == nil && eb == nil && acell == bcell { same++ } } if (float32(same) / float32(rows)) > MinRatio { a2b[i] = start + 1 - b2a[start + 1] = i + b2a[start+1] = i } } } @@ -292,9 +292,9 @@ func tryMergeLines(lines []*DiffLine) [][2]int { j := 0 for i = 0; i < len(ids); i++ { if ids[i][0] == 0 { - if j > 0 && result[j - 1][1] == 0 { + if j > 0 && result[j-1][1] == 0 { temp := j - for temp > 0 && result[temp - 1][1] == 0 { + for temp > 0 && result[temp-1][1] == 0 { temp-- } result[temp][1] = ids[i][1] @@ -306,4 +306,4 @@ func tryMergeLines(lines []*DiffLine) [][2]int { } return result[:j] -} \ No newline at end of file +} From 16db3bae673420f240d7520e436cb527d64732cd Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Fri, 12 Feb 2021 14:48:34 +0000 Subject: [PATCH 07/22] Added csv diff template. --- options/locale/locale_en-US.ini | 1 + templates/repo/diff/box.tmpl | 43 +++++++++++++++------- templates/repo/diff/csv_diff.tmpl | 44 +++++++++++++++++++++++ web_src/js/index.js | 13 +++++++ web_src/less/_repository.less | 60 +++++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 13 deletions(-) create mode 100644 templates/repo/diff/csv_diff.tmpl diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index a4b677e43baf7..a996b561726f6 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1813,6 +1813,7 @@ diff.whitespace_ignore_at_eol = Ignore changes in whitespace at EOL diff.stats_desc = %d changed files with %d additions and %d deletions diff.stats_desc_file = %d changes: %d additions and %d deletions diff.bin = BIN +diff.bin_not_shown = Binary file not shown. diff.view_file = View File diff.file_before = Before diff.file_after = After diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index f01795ad15b60..faf19bd33aaf6 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -87,6 +87,8 @@ {{else}} {{$isImage = (call $.IsImageFileInHead $file.Name)}} {{end}} + {{$isCsv := (call $.IsCsvFile $file)}} + {{$showFileViewToggle := or $isImage $isCsv}} {{svg "octicon-chevron-down" 18}} @@ -100,6 +102,12 @@ {{if $file.IsRenamed}}{{$file.OldName}} → {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}
+ {{if $showFileViewToggle}} +
+ {{svg "octicon-code"}} + {{svg "octicon-file"}} +
+ {{end}} {{if $file.IsProtected}} {{$.i18n.Tr "repo.diff.protected"}} {{end}} @@ -114,21 +122,30 @@
{{if ne $file.Type 4}} -
-
- - {{if $isImage}} - {{template "repo/diff/image_diff" dict "file" . "root" $}} - {{else}} - {{if $.IsSplitStyle}} - {{template "repo/diff/section_split" dict "file" . "root" $}} - {{else}} - {{template "repo/diff/section_unified" dict "file" . "root" $}} - {{end}} - {{end}} - +
+ {{if $file.IsBin}} +
{{$.i18n.Tr "repo.diff.bin_not_shown"}}
+ {{else}} +
+ {{if $.IsSplitStyle}} + {{template "repo/diff/section_split" dict "file" . "root" $}} + {{else}} + {{template "repo/diff/section_unified" dict "file" . "root" $}} + {{end}}
+ {{end}} + {{if or $isImage $isCsv}} +
+ + {{if $isImage}} + {{template "repo/diff/image_diff" dict "file" . "root" $}} + {{else}} + {{template "repo/diff/csv_diff" dict "file" . "root" $}} + {{end}} +
+
+ {{end}} {{end}} diff --git a/templates/repo/diff/csv_diff.tmpl b/templates/repo/diff/csv_diff.tmpl new file mode 100644 index 0000000000000..fd2aeaed124c3 --- /dev/null +++ b/templates/repo/diff/csv_diff.tmpl @@ -0,0 +1,44 @@ + + + + {{$sections := call .root.CreateCsvDiff .file .root.BaseCommit .root.HeadCommit}} + {{if $sections}} + {{range $i, $section := $sections}} + + {{range $j, $row := $section.Rows}} + + {{if and (eq $i 0) (eq $j 0)}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + + {{end}} + {{end}} + {{else}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + + {{end}} + {{end}} + {{end}} + + {{end}} + + {{end}} + {{end}} +
{{.RowIdx}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}{{if .RowIdx}}{{.RowIdx}}{{end}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}
+ + \ No newline at end of file diff --git a/web_src/js/index.js b/web_src/js/index.js index f5f4841410b41..0d852f7d83be5 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -2390,6 +2390,18 @@ function initIssueReferenceRepositorySearch() { }); } +function initFileViewToggle() { + $('.file-view-toggle').on('click', function() { + const $this = $(this); + $this.parent().children().removeClass('active'); + $this.addClass('active'); + + const $target = $($this.data('toggle-selector')); + $target.parent().children().addClass('hide'); + $target.removeClass('hide'); + }); +} + $(document).ready(async () => { // Show exact time $('.time-since').each(function () { @@ -2611,6 +2623,7 @@ $(document).ready(async () => { initTableSort(); initNotificationsTable(); initPullRequestMergeInstruction(); + initFileViewToggle(); const routes = { 'div.user.settings': initUserSettings, diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index 57f101abb9186..d1cf17ca7f3f4 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -1443,6 +1443,66 @@ } } + .data-table { + width: 100%; + + tr { + border-top: 0; + } + + td, + th { + padding: 5px !important; + overflow: hidden; + font-size: 12px; + line-height: 1; + text-align: left; + white-space: nowrap; + border: 1px solid var(--color-secondary); + } + + td { + white-space: pre-line; + } + + th { + font-weight: 600; + background: var(--color-box-header); + border-top: 0; + } + + td.added, + th.added, + tr.added { + background-color: var(--color-diff-added-row-bg) !important; + } + + td.removed, + th.removed, + tr.removed { + background-color: var(--color-diff-removed-row-bg) !important; + } + + tbody.section { + border-top: 2px solid var(--color-secondary); + } + + .line-num { + width: 1%; + min-width: 50px; + font-family: monospace; + line-height: 20px; + color: var(--color-secondary-dark-2); + white-space: nowrap; + vertical-align: top; + cursor: pointer; + user-select: none; + text-align: right; + background: var(--color-white); + border: 0; + } + } + .diff-detail-box { padding: 7px 0; background: var(--color-body); From d13a2cd927c73340bac956fc92ac51a60d23dedc Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Fri, 12 Feb 2021 14:51:13 +0000 Subject: [PATCH 08/22] Use new table style in csv markup. --- modules/markup/csv/csv.go | 31 +++++++++++++++++++++++++++---- modules/markup/sanitizer.go | 4 ++++ templates/repo/view_file.tmpl | 2 +- web_src/less/_markdown.less | 25 ------------------------- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 64a9438c9cc3c..1b98638011544 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -8,6 +8,7 @@ import ( "bytes" "html" "io" + "strconv" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/markup" @@ -35,7 +36,24 @@ func (Parser) Extensions() []string { func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { rd := base.CreateCsvReaderAndGuessDelimiter(rawBytes) var tmpBlock bytes.Buffer - tmpBlock.WriteString(``) + + writeField := func(element, class, field string) { + tmpBlock.WriteString("<") + tmpBlock.WriteString(element) + if len(class) > 0 { + tmpBlock.WriteString(" class=\"") + tmpBlock.WriteString(class) + tmpBlock.WriteString("\"") + } + tmpBlock.WriteString(">") + tmpBlock.WriteString(html.EscapeString(field)) + tmpBlock.WriteString("") + } + + tmpBlock.WriteString(`
`) + row := 1 for { fields, err := rd.Read() if err == io.EOF { @@ -45,12 +63,17 @@ func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, continue } tmpBlock.WriteString("") + element := "td" + if row == 1 { + element = "th" + } + writeField(element, "line-num", strconv.Itoa(row)) for _, field := range fields { - tmpBlock.WriteString("") + writeField(element, "", field) } tmpBlock.WriteString("") + + row++ } tmpBlock.WriteString("
") - tmpBlock.WriteString(html.EscapeString(field)) - tmpBlock.WriteString("
") diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 9214a75fb355b..19feaa3cce9a5 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -69,6 +69,10 @@ func ReplaceSanitizer() { // Allow icons, emojis, and chroma syntax on span sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span") + // Allow data tables + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table") + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td") + // Allow generally safe attributes generalSafeAttrs := []string{"abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 15a8a589f120c..39ba8b1407571 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -64,7 +64,7 @@ {{end}}
-
+
{{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} {{else if .IsRenderedHTML}} diff --git a/web_src/less/_markdown.less b/web_src/less/_markdown.less index 6bb0bf4328ae1..09c94f0678af5 100644 --- a/web_src/less/_markdown.less +++ b/web_src/less/_markdown.less @@ -473,31 +473,6 @@ box-shadow: inset 0 -1px 0 var(--color-secondary); } - .csv-data td, - .csv-data th { - padding: 5px; - overflow: hidden; - font-size: 12px; - line-height: 1; - text-align: left; - white-space: nowrap; - } - - .csv-data .blob-num { - padding: 10px 8px 9px; - text-align: right; - border: 0; - } - - .csv-data tr { - border-top: 0; - } - - .csv-data th { - font-weight: 600; - border-top: 0; - } - .ui.list .list, ol.ui.list ol, ul.ui.list ul { From 2b4eaceeda5f9fd609edc4059cb5bd9d4a6c8d64 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Fri, 12 Feb 2021 15:37:46 +0000 Subject: [PATCH 09/22] Lint & tests. --- modules/base/csv_test.go | 4 ++-- modules/markup/csv/csv_test.go | 12 ++++-------- services/gitdiff/csv.go | 8 +++----- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/modules/base/csv_test.go b/modules/base/csv_test.go index bd9952e2c1153..e8f201a38a3d0 100644 --- a/modules/base/csv_test.go +++ b/modules/base/csv_test.go @@ -22,7 +22,7 @@ func TestCreateCsvReaderAndGuessDelimiter(t *testing.T) { assert.Equal(t, ';', rd.Comma) } -func TestGuessDelimiter(t *testint.T) { +func TestGuessDelimiter(t *testing.T) { var kases = map[string]rune{ "a": ',', "1,2": ',', @@ -37,4 +37,4 @@ func TestGuessDelimiter(t *testint.T) { for k, v := range kases { assert.EqualValues(t, guessDelimiter([]byte(k)), v) } -} \ No newline at end of file +} diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 4d4e0871e94d4..5438ebdf5ca2a 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -13,14 +13,10 @@ import ( func TestRenderCSV(t *testing.T) { var parser Parser var kases = map[string]string{ - "a": "
a
", - "1,2": "
12
", - "1;2": "
12
", - "1\t2": "
12
", - "1|2": "
12
", - "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", - "\"1,2,3,4\";\"a\nb\"\nc;d": "
1,2,3,4a\nb
cd
", - "
": "
<br/>
", + "a": "
1a
", + "1,2": "
112
", + "1;2\n3;4": "
112
234
", + "
": "
1<br/>
", } for k, v := range kases { diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go index 7ae82cfdda65b..9887c82fefbe8 100644 --- a/services/gitdiff/csv.go +++ b/services/gitdiff/csv.go @@ -74,17 +74,15 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab } func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { - arows, err := baseReader.ReadAll() + a, err := baseReader.ReadAll() if err != nil { return nil, err } - a := arows[:] - brows, err := headReader.ReadAll() + b, err := headReader.ReadAll() if err != nil { return nil, err } - b := brows[:] a2b, b2a := getColumnMapping(a, b) @@ -272,7 +270,7 @@ func countUnmappedColumns(mapping []int) int { return count } -// tryMergeLines maps the seperated line numbers of a git diff. +// tryMergeLines maps the separated line numbers of a git diff. func tryMergeLines(lines []*DiffLine) [][2]int { ids := make([][2]int, len(lines)) From 79bdeca58b96e37273166278476dab3f993dfe8c Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Fri, 12 Feb 2021 22:39:11 +0000 Subject: [PATCH 10/22] Fixed wrong value. --- docs/content/doc/advanced/config-cheat-sheet.en-us.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 9b5c4cbf2df7c..2a8724d732418 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -756,7 +756,7 @@ NB: You must have `DISABLE_ROUTER_LOG` set to `false` for this option to take ef - `PATH`: **""**: The path of git executable. If empty, Gitea searches through the PATH environment. - `DISABLE_DIFF_HIGHLIGHT`: **false**: Disables highlight of added and removed changes. -- `MAX_GIT_DIFF_LINES`: **100**: Max number of lines allowed of a single file in diff view. +- `MAX_GIT_DIFF_LINES`: **1000**: Max number of lines allowed of a single file in diff view. - `MAX_GIT_DIFF_LINE_CHARACTERS`: **5000**: Max character count per line highlighted in diff view. - `MAX_GIT_DIFF_FILES`: **100**: Max number of files shown in diff view. - `COMMITS_RANGE_SIZE`: **50**: Set the default commits range size From db623ab23a4d33459119a1bfb6d3eb1582372d8e Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sat, 13 Feb 2021 11:13:53 +0000 Subject: [PATCH 11/22] Added file size limit for csv rendering. Display csv parser errors in diff. --- custom/conf/app.example.ini | 4 ++ .../doc/advanced/config-cheat-sheet.en-us.md | 4 ++ modules/base/csv.go | 15 ++++ modules/markup/csv/csv.go | 13 +++- modules/setting/setting.go | 9 +++ options/locale/locale_en-US.ini | 4 ++ routers/repo/compare.go | 37 ++++++++-- templates/repo/diff/csv_diff.tmpl | 72 ++++++++++--------- 8 files changed, 114 insertions(+), 44 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index cec7e4255a20f..dc67463a6944d 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -248,6 +248,10 @@ EVENT_SOURCE_UPDATE_TIME = 10s ; Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. ENABLE_RENDER = true +[ui.csv] +; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). +MAX_FILE_SIZE = 524288 + [markdown] ; Render soft line breaks as hard line breaks, which means a single newline character between ; paragraphs will cause a line break and adding trailing whitespace to paragraphs is not diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 2a8724d732418..44865392cd1d5 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -198,6 +198,10 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `ENABLE_RENDER`: **true**: Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. +### UI - CSV Files (`ui.csv`) + +- `MAX_FILE_SIZE`: **524288** (512kb): Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). + ## Markdown (`markdown`) - `ENABLE_HARD_LINE_BREAK_IN_COMMENTS`: **true**: Render soft line breaks as hard line breaks in comments, which diff --git a/modules/base/csv.go b/modules/base/csv.go index c33ead67a6e84..7384f4cf7120e 100644 --- a/modules/base/csv.go +++ b/modules/base/csv.go @@ -7,10 +7,12 @@ package base import ( "bytes" "encoding/csv" + "errors" "regexp" "strings" "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/translation" ) var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) @@ -76,3 +78,16 @@ func scoreDelimiter(lines []string, delim rune) float64 { return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) } + +// FormatCsvError converts csv errors into readable messages. +func FormatCsvError(err error, locale translation.Locale) (string, error) { + var perr *csv.ParseError + if errors.As(err, &perr) { + if perr.Err == csv.ErrFieldCount { + return locale.Tr("repo.error.csv.invalid_field_count", perr.Line), nil + } + return locale.Tr("repo.error.csv.unexpected", perr.Line, perr.Column), nil + } + + return "", err +} diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1b98638011544..f6df428c5a779 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -12,13 +12,14 @@ import ( "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/setting" ) func init() { markup.RegisterParser(Parser{}) } -// Parser implements markup.Parser for orgmode +// Parser implements markup.Parser for csv files type Parser struct { } @@ -34,9 +35,17 @@ func (Parser) Extensions() []string { // Render implements markup.Parser func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - rd := base.CreateCsvReaderAndGuessDelimiter(rawBytes) var tmpBlock bytes.Buffer + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) { + tmpBlock.WriteString("
")
+		tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
+		tmpBlock.WriteString("
") + return tmpBlock.Bytes() + } + + rd := base.CreateCsvReaderAndGuessDelimiter(rawBytes) + writeField := func(element, class, field string) { tmpBlock.WriteString("<") tmpBlock.WriteString(element) diff --git a/modules/setting/setting.go b/modules/setting/setting.go index dd38f5be45dd2..174d284eaa242 100644 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -194,6 +194,10 @@ var ( Enabled bool `ini:"ENABLE_RENDER"` } `ini:"ui.svg"` + CSV struct { + MaxFileSize int64 + } `ini:"ui.csv"` + Admin struct { UserPagingNum int RepoPagingNum int @@ -239,6 +243,11 @@ var ( }{ Enabled: true, }, + CSV: struct { + MaxFileSize int64 + }{ + MaxFileSize: 524288, + }, Admin: struct { UserPagingNum int RepoPagingNum int diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index a996b561726f6..8edc017845ffe 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1906,6 +1906,10 @@ topic.done = Done topic.count_prompt = You can not select more than 25 topics topic.format_prompt = Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long. +error.csv.too_large = Can't render this file because it is too large. +error.csv.unexpected = Can't render this file because it contains an unexpected character in line %d and column %d. +error.csv.invalid_field_count = Can't render this file because it has a wrong number of fields in line %d. + [org] org_name_holder = Organization Name org_full_name_holder = Organization Full Name diff --git a/routers/repo/compare.go b/routers/repo/compare.go index d93ec5dab3a81..92c7c34ae9bad 100644 --- a/routers/repo/compare.go +++ b/routers/repo/compare.go @@ -7,6 +7,7 @@ package repo import ( "bufio" "encoding/csv" + "errors" "fmt" "html" "io/ioutil" @@ -84,17 +85,29 @@ func setCsvCompareContext(ctx *context.Context) { extension := strings.ToLower(filepath.Ext(diffFile.Name)) return extension == ".csv" || extension == ".tsv" } - ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) []*gitdiff.TableDiffSection { + + type CsvDiffResult struct { + Sections []*gitdiff.TableDiffSection + Error string + } + + ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) CsvDiffResult { if diffFile == nil || baseCommit == nil || headCommit == nil { - return nil + return CsvDiffResult{nil, ""} } + errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large")) + csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) { blob, err := c.GetBlobByPath(diffFile.Name) if err != nil { return nil, err } + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() { + return nil, errTooLarge + } + reader, err := blob.DataAsync() if err != nil { return nil, err @@ -109,15 +122,25 @@ func setCsvCompareContext(ctx *context.Context) { return base.CreateCsvReaderAndGuessDelimiter(b), nil } - baseReader, _ := csvReaderFromCommit(baseCommit) - headReader, _ := csvReaderFromCommit(headCommit) + baseReader, err := csvReaderFromCommit(baseCommit) + if err == errTooLarge { + return CsvDiffResult{nil, err.Error()} + } + headReader, err := csvReaderFromCommit(headCommit) + if err == errTooLarge { + return CsvDiffResult{nil, err.Error()} + } sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader) if err != nil { - log.Error("RenderCsvDiff failed: %v", err) - return nil + errMessage, err := base.FormatCsvError(err, ctx.Locale) + if err != nil { + log.Error("RenderCsvDiff failed: %v", err) + return CsvDiffResult{nil, ""} + } + return CsvDiffResult{nil, errMessage} } - return sections + return CsvDiffResult{sections, ""} } } diff --git a/templates/repo/diff/csv_diff.tmpl b/templates/repo/diff/csv_diff.tmpl index fd2aeaed124c3..c4af70461fed9 100644 --- a/templates/repo/diff/csv_diff.tmpl +++ b/templates/repo/diff/csv_diff.tmpl @@ -1,44 +1,46 @@ - - {{$sections := call .root.CreateCsvDiff .file .root.BaseCommit .root.HeadCommit}} - {{if $sections}} - {{range $i, $section := $sections}} - - {{range $j, $row := $section.Rows}} - - {{if and (eq $i 0) (eq $j 0)}} - - {{range $j, $cell := $row.Cells}} - {{if eq $cell.Type 2}} - - {{else if eq $cell.Type 3}} - - {{else if eq $cell.Type 4}} - - {{else}} - - {{end}} + {{$result := call .root.CreateCsvDiff .file .root.BaseCommit .root.HeadCommit}} + {{if $result.Error}} +
{{$result.Error}}
+ {{else if $result.Sections}} +
{{.RowIdx}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}
+ {{range $i, $section := $result.Sections}} + + {{range $j, $row := $section.Rows}} + + {{if and (eq $i 0) (eq $j 0)}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + {{end}} - {{else}} - - {{range $j, $cell := $row.Cells}} - {{if eq $cell.Type 2}} - - {{else if eq $cell.Type 3}} - - {{else if eq $cell.Type 4}} - - {{else}} - - {{end}} + {{end}} + {{else}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + {{end}} {{end}} - - {{end}} - + {{end}} + {{end}} + {{end}} -
{{.RowIdx}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}{{if .RowIdx}}{{.RowIdx}}{{end}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}{{if .RowIdx}}{{.RowIdx}}{{end}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}
+ + {{end}} \ No newline at end of file From f8b902ba3f2e4d1fdb81d0c65b0aa1fe8821ce02 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sat, 13 Feb 2021 12:39:32 +0000 Subject: [PATCH 12/22] Lazy read single file. --- services/gitdiff/csv.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go index 9887c82fefbe8..7f5e1fbbfc738 100644 --- a/services/gitdiff/csv.go +++ b/services/gitdiff/csv.go @@ -7,6 +7,7 @@ package gitdiff import ( "encoding/csv" "errors" + "io" "code.gitea.io/gitea/modules/util" ) @@ -56,18 +57,22 @@ func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { - a, err := reader.ReadAll() + var rows []*TableDiffRow + i := 1 + for { + row, err := reader.Read() if err != nil { + if err == io.EOF { + break + } return nil, err } - - rows := make([]*TableDiffRow, len(a)) - for i, row := range a { cells := make([]*TableDiffCell, len(row)) for j := 0; j < len(row); j++ { cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} } - rows[i] = &TableDiffRow{RowIdx: i + 1, Cells: cells} + rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) + i++ } return []*TableDiffSection{{Rows: rows}}, nil From 0753b92bedc8c88a6866816bbde77fd13d4f2873 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sat, 13 Feb 2021 13:38:35 +0000 Subject: [PATCH 13/22] Lazy read rows for full diff. --- modules/base/csv.go | 2 +- services/gitdiff/csv.go | 143 +++++++++++++++++++++++++++++----------- 2 files changed, 106 insertions(+), 39 deletions(-) diff --git a/modules/base/csv.go b/modules/base/csv.go index 7384f4cf7120e..14096d9af5e75 100644 --- a/modules/base/csv.go +++ b/modules/base/csv.go @@ -11,8 +11,8 @@ import ( "regexp" "strings" - "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" ) var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go index 7f5e1fbbfc738..f4310d8772870 100644 --- a/services/gitdiff/csv.go +++ b/services/gitdiff/csv.go @@ -13,6 +13,8 @@ import ( ) const unmappedColumn = -1 +const maxRowsToInspect int = 10 +const minRatioToMatch float32 = 0.8 // TableDiffCellType represents the type of a TableDiffCell. type TableDiffCellType uint8 @@ -43,6 +45,66 @@ type TableDiffSection struct { Rows []*TableDiffRow } +// csvReader wraps a csv.Reader which buffers the first rows. +type csvReader struct { + reader *csv.Reader + buffer [][]string + line int + eof bool +} + +// createCsvReader creates a csvReader and fills the buffer +func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { + csv := &csvReader{reader: reader} + csv.buffer = make([][]string, bufferRowCount) + for i := 0; i < bufferRowCount && !csv.eof; i++ { + row, err := csv.readNextRow() + if err != nil { + return nil, err + } + csv.buffer[i] = row + } + csv.line = bufferRowCount + return csv, nil +} + +// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. +func (csv *csvReader) GetRow(row int) ([]string, error) { + if row < len(csv.buffer) { + return csv.buffer[row], nil + } + if csv.eof { + return nil, nil + } + for { + fields, err := csv.readNextRow() + if err != nil { + return nil, err + } + if csv.eof { + return nil, nil + } + csv.line++ + if csv.line-1 == row { + return fields, nil + } + } +} + +func (csv *csvReader) readNextRow() ([]string, error) { + if csv.eof { + return nil, nil + } + row, err := csv.reader.Read() + if err != nil { + if err != io.EOF { + return nil, err + } + csv.eof = true + } + return row, nil +} + // CreateCsvDiff creates a tabular diff based on two CSV readers. func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { if baseReader != nil && headReader != nil { @@ -61,12 +123,12 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab i := 1 for { row, err := reader.Read() - if err != nil { + if err != nil { if err == io.EOF { break } - return nil, err - } + return nil, err + } cells := make([]*TableDiffCell, len(row)) for j := 0; j < len(row); j++ { cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} @@ -79,12 +141,12 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab } func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { - a, err := baseReader.ReadAll() + a, err := createCsvReader(baseReader, maxRowsToInspect) if err != nil { return nil, err } - b, err := headReader.ReadAll() + b, err := createCsvReader(headReader, maxRowsToInspect) if err != nil { return nil, err } @@ -96,34 +158,44 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R columns = len(b2a) + countUnmappedColumns(a2b) } - createDiffRow := func(aline int, bline int) *TableDiffRow { + createDiffRow := func(aline int, bline int) (*TableDiffRow, error) { cells := make([]*TableDiffCell, columns) if aline == 0 || bline == 0 { var ( row []string celltype TableDiffCellType + err error ) if bline == 0 { - row = getRow(a, aline-1) + row, err = a.GetRow(aline - 1) celltype = TableDiffCellDel } else { - row = getRow(b, bline-1) + row, err = b.GetRow(bline - 1) celltype = TableDiffCellAdd } + if err != nil { + return nil, err + } if row == nil { - return nil + return nil, nil } for i := 0; i < len(row); i++ { cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype} } - return &TableDiffRow{RowIdx: bline, Cells: cells} + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil } - arow := getRow(a, aline-1) - brow := getRow(b, bline-1) + arow, err := a.GetRow(aline - 1) + if err != nil { + return nil, err + } + brow, err := b.GetRow(bline - 1) + if err != nil { + return nil, err + } if len(arow) == 0 && len(brow) == 0 { - return nil + return nil, nil } for i := 0; i < len(a2b); i++ { @@ -148,7 +220,7 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R } } - return &TableDiffRow{RowIdx: bline, Cells: cells} + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil } var sections []*TableDiffSection @@ -158,12 +230,18 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R lines := tryMergeLines(section.Lines) for j, line := range lines { if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { - diffRow := createDiffRow(1, 1) + diffRow, err := createDiffRow(1, 1) + if err != nil { + return nil, err + } if diffRow != nil { rows = append(rows, diffRow) } } - diffRow := createDiffRow(line[0], line[1]) + diffRow, err := createDiffRow(line[0], line[1]) + if err != nil { + return nil, err + } if diffRow != nil { rows = append(rows, diffRow) } @@ -178,18 +256,18 @@ func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.R } // getColumnMapping creates a mapping of columns between a and b -func getColumnMapping(a [][]string, b [][]string) ([]int, []int) { - arow := getRow(a, 0) - brow := getRow(b, 0) +func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) { + arow, _ := a.GetRow(0) + brow, _ := b.GetRow(0) a2b := []int{} b2a := []int{} if arow != nil { - a2b = make([]int, len(a[0])) + a2b = make([]int, len(arow)) } if brow != nil { - b2a = make([]int, len(b[0])) + b2a = make([]int, len(brow)) } for i := 0; i < len(b2a); i++ { @@ -221,24 +299,21 @@ func getColumnMapping(a [][]string, b [][]string) ([]int, []int) { } // tryMapColumnsByContent tries to map missing columns by the content of the first lines. -func tryMapColumnsByContent(a [][]string, a2b []int, b [][]string, b2a []int) { - const MaxRows int = 10 - const MinRatio float32 = 0.8 - +func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) { start := 0 for i := 0; i < len(a2b); i++ { if a2b[i] == unmappedColumn { if b2a[start] == unmappedColumn { - rows := util.Min(MaxRows, util.Max(0, util.Min(len(a), len(b))-1)) + rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1)) same := 0 for j := 1; j <= rows; j++ { - acell, ea := getCell(getRow(a, j), i) - bcell, eb := getCell(getRow(b, j), start+1) + acell, ea := getCell(a.buffer[j], i) + bcell, eb := getCell(b.buffer[j], start+1) if ea == nil && eb == nil && acell == bcell { same++ } } - if (float32(same) / float32(rows)) > MinRatio { + if (float32(same) / float32(rows)) > minRatioToMatch { a2b[i] = start + 1 b2a[start+1] = i } @@ -248,14 +323,6 @@ func tryMapColumnsByContent(a [][]string, a2b []int, b [][]string, b2a []int) { } } -// getRow returns the specific row or nil if not present. -func getRow(records [][]string, row int) []string { - if row < len(records) { - return records[row] - } - return nil -} - // getCell returns the specific cell or nil if not present. func getCell(row []string, column int) (string, error) { if column < len(row) { @@ -275,7 +342,7 @@ func countUnmappedColumns(mapping []int) int { return count } -// tryMergeLines maps the separated line numbers of a git diff. +// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. func tryMergeLines(lines []*DiffLine) [][2]int { ids := make([][2]int, len(lines)) From 66ea833cce55ccef8f2da6b19f3336304f54adf3 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 15 Feb 2021 15:25:32 +0000 Subject: [PATCH 14/22] Moved code to csv package. --- modules/{base => csv}/csv.go | 18 +++++++++--------- modules/{base => csv}/csv_test.go | 2 +- modules/markup/csv/csv.go | 4 ++-- routers/repo/compare.go | 5 +++-- 4 files changed, 15 insertions(+), 14 deletions(-) rename modules/{base => csv}/csv.go (79%) rename modules/{base => csv}/csv_test.go (98%) diff --git a/modules/base/csv.go b/modules/csv/csv.go similarity index 79% rename from modules/base/csv.go rename to modules/csv/csv.go index 14096d9af5e75..e3969afbb0e9c 100644 --- a/modules/base/csv.go +++ b/modules/csv/csv.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package base +package csv import ( "bytes" @@ -17,18 +17,18 @@ import ( var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) -// CreateCsvReader creates a CSV reader with the given delimiter. -func CreateCsvReader(rawBytes []byte, delimiter rune) *csv.Reader { +// CreateReader creates a csv.Reader with the given delimiter. +func CreateReader(rawBytes []byte, delimiter rune) *csv.Reader { rd := csv.NewReader(bytes.NewReader(rawBytes)) rd.Comma = delimiter rd.TrimLeadingSpace = true return rd } -// CreateCsvReaderAndGuessDelimiter creates a CSV reader with a guessed delimiter. -func CreateCsvReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader { +// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader. +func CreateReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader { delimiter := guessDelimiter(rawBytes) - return CreateCsvReader(rawBytes, delimiter) + return CreateReader(rawBytes, delimiter) } // guessDelimiter scores the input CSV data against delimiters, and returns the best match. @@ -55,7 +55,7 @@ func guessDelimiter(data []byte) rune { return bestDelim } -// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV +// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV. func scoreDelimiter(lines []string, delim rune) float64 { countTotal := 0 countLineMax := 0 @@ -79,8 +79,8 @@ func scoreDelimiter(lines []string, delim rune) float64 { return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) } -// FormatCsvError converts csv errors into readable messages. -func FormatCsvError(err error, locale translation.Locale) (string, error) { +// FormatError converts csv errors into readable messages. +func FormatError(err error, locale translation.Locale) (string, error) { var perr *csv.ParseError if errors.As(err, &perr) { if perr.Err == csv.ErrFieldCount { diff --git a/modules/base/csv_test.go b/modules/csv/csv_test.go similarity index 98% rename from modules/base/csv_test.go rename to modules/csv/csv_test.go index e8f201a38a3d0..394e4f7a2e663 100644 --- a/modules/base/csv_test.go +++ b/modules/csv/csv_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package base +package csv import ( "testing" diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index f6df428c5a779..430e759eb5323 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -10,7 +10,7 @@ import ( "io" "strconv" - "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" ) @@ -44,7 +44,7 @@ func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, return tmpBlock.Bytes() } - rd := base.CreateCsvReaderAndGuessDelimiter(rawBytes) + rd := csv.CreateReaderAndGuessDelimiter(rawBytes) writeField := func(element, class, field string) { tmpBlock.WriteString("<") diff --git a/routers/repo/compare.go b/routers/repo/compare.go index 92c7c34ae9bad..4f8389c7a85c0 100644 --- a/routers/repo/compare.go +++ b/routers/repo/compare.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" + csv_module "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -119,7 +120,7 @@ func setCsvCompareContext(ctx *context.Context) { return nil, err } - return base.CreateCsvReaderAndGuessDelimiter(b), nil + return csv_module.CreateReaderAndGuessDelimiter(b), nil } baseReader, err := csvReaderFromCommit(baseCommit) @@ -133,7 +134,7 @@ func setCsvCompareContext(ctx *context.Context) { sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader) if err != nil { - errMessage, err := base.FormatCsvError(err, ctx.Locale) + errMessage, err := csv_module.FormatError(err, ctx.Locale) if err != nil { log.Error("RenderCsvDiff failed: %v", err) return CsvDiffResult{nil, ""} From 828a4b9b7b8d7fe7e42a39b631db04fae06a7d5d Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 15 Feb 2021 15:29:40 +0000 Subject: [PATCH 15/22] Fixed method name. --- modules/csv/csv_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go index 394e4f7a2e663..6d2dacc1de202 100644 --- a/modules/csv/csv_test.go +++ b/modules/csv/csv_test.go @@ -10,15 +10,15 @@ import ( "github.com/stretchr/testify/assert" ) -func TestCreateCsvReader(t *testing.T) { - rd := CreateCsvReader([]byte{}, ',') +func TestCreateReader(t *testing.T) { + rd := CreateReader([]byte{}, ',') assert.Equal(t, ',', rd.Comma) } -func TestCreateCsvReaderAndGuessDelimiter(t *testing.T) { +func TestCreateReaderAndGuessDelimiter(t *testing.T) { input := "a;b;c\n1;2;3\n4;5;6" - rd := CreateCsvReaderAndGuessDelimiter([]byte(input)) + rd := CreateReaderAndGuessDelimiter([]byte(input)) assert.Equal(t, ';', rd.Comma) } From 1038cc58799c64c63aa4fae3e5a667ce5d30aac5 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Wed, 10 Mar 2021 22:25:10 +0000 Subject: [PATCH 16/22] Revert unrelated change. --- docs/content/doc/advanced/config-cheat-sheet.en-us.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 2e6c46d6b740a..0088023d7d83e 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -762,7 +762,7 @@ NB: You must have `DISABLE_ROUTER_LOG` set to `false` for this option to take ef - `PATH`: **""**: The path of git executable. If empty, Gitea searches through the PATH environment. - `DISABLE_DIFF_HIGHLIGHT`: **false**: Disables highlight of added and removed changes. -- `MAX_GIT_DIFF_LINES`: **1000**: Max number of lines allowed of a single file in diff view. +- `MAX_GIT_DIFF_LINES`: **100**: Max number of lines allowed of a single file in diff view. - `MAX_GIT_DIFF_LINE_CHARACTERS`: **5000**: Max character count per line highlighted in diff view. - `MAX_GIT_DIFF_FILES`: **100**: Max number of files shown in diff view. - `COMMITS_RANGE_SIZE`: **50**: Set the default commits range size From 8f1701661b95b0c92f1448a636a1b957a77763c6 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Wed, 24 Mar 2021 16:16:17 +0100 Subject: [PATCH 17/22] Added unit tests for various csv changes. --- services/gitdiff/csv_test.go | 119 +++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 services/gitdiff/csv_test.go diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go new file mode 100644 index 0000000000000..17edea582c407 --- /dev/null +++ b/services/gitdiff/csv_test.go @@ -0,0 +1,119 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + "strings" + "testing" + + csv_module "code.gitea.io/gitea/modules/csv" + "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" +) + +func TestCSVDiff(t *testing.T) { + var cases = []struct { + diff string + base string + head string + cells [][2]TableDiffCellType + }{ + // case 0 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -0,0 +1,2 @@ ++col1,col2 ++a,a`, + base: "", + head: "col1,col2\na,a", + cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 1 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,3 @@ + col1,col2 +-a,a ++a,a ++b,b`, + base: "col1,col2\na,a", + head: "col1,col2\na,a\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 2 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,3 +1,2 @@ + col1,col2 +-a,a + b,b`, + base: "col1,col2\na,a\nb,b", + head: "col1,col2\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}}, + }, + // case 3 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ + col1,col2 +-b,b ++b,c`, + base: "col1,col2\nb,b", + head: "col1,col2\nb,c", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}}, + }, + // case 4 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +0,0 @@ +-col1,col2 +-b,c`, + base: "col1,col2\nb,c", + head: "", + cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}}, + }, + } + + for n, c := range cases { + diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff)) + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + var baseReader *csv.Reader + if len(c.base) > 0 { + baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base)) + } + var headReader *csv.Reader + if len(c.head) > 0 { + headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head)) + } + + result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "case %d: should be one section", n) + + section := result[0] + assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells)) + + for i, row := range section.Rows { + assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i) + for j, cell := range row.Cells { + assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j) + } + } + } +} From 3835cf0047e863a0bbfc558e1c9d80f31f13c4ba Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Wed, 24 Mar 2021 16:17:17 +0100 Subject: [PATCH 18/22] Removed line-height. --- web_src/less/_repository.less | 1 - 1 file changed, 1 deletion(-) diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index fee3381c7b291..bab9cec1ad08d 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -1467,7 +1467,6 @@ padding: 5px !important; overflow: hidden; font-size: 12px; - line-height: 1; text-align: left; white-space: nowrap; border: 1px solid var(--color-secondary); From 260dd93029393cb879d9b1108c72941f5aa6ea2b Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 16:20:00 +0200 Subject: [PATCH 19/22] Don't use unthemed color. --- web_src/less/_repository.less | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index bab9cec1ad08d..10e5736787a38 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -1509,7 +1509,7 @@ cursor: pointer; user-select: none; text-align: right; - background: var(--color-white); + background: var(--color-body); border: 0; } } From 15dca097da7f539fb0499209d8a0998d4d524778 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 16:22:00 +0200 Subject: [PATCH 20/22] Added possible @ delimiter. --- modules/csv/csv.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/csv/csv.go b/modules/csv/csv.go index e3969afbb0e9c..332fa9132b99c 100644 --- a/modules/csv/csv.go +++ b/modules/csv/csv.go @@ -41,7 +41,7 @@ func guessDelimiter(data []byte) rune { lines := strings.SplitN(text, "\n", maxLines+1) lines = lines[:util.Min(maxLines, len(lines))] - delimiters := []rune{',', ';', '\t', '|'} + delimiters := []rune{',', ';', '\t', '|', '@'} bestDelim := delimiters[0] bestScore := 0.0 for _, delim := range delimiters { From 925596ecdef99a1e0df3bef7b05cc08bb4801938 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 16:23:00 +0200 Subject: [PATCH 21/22] Changed copyright notice. --- modules/csv/csv.go | 2 +- modules/csv/csv_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/csv/csv.go b/modules/csv/csv.go index 332fa9132b99c..1aa78fdeec76a 100644 --- a/modules/csv/csv.go +++ b/modules/csv/csv.go @@ -1,4 +1,4 @@ -// Copyright 2018 The Gitea Authors. All rights reserved. +// Copyright 2021 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go index 6d2dacc1de202..3a7584e21d2e7 100644 --- a/modules/csv/csv_test.go +++ b/modules/csv/csv_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 The Gitea Authors. All rights reserved. +// Copyright 2021 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. From 05a29aeb58daea8adbf9b7a0878f06b03e314700 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 14:27:00 +0000 Subject: [PATCH 22/22] Encode content as UTF8. --- routers/repo/compare.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/routers/repo/compare.go b/routers/repo/compare.go index 839399e4ffe1b..00f01f13b8714 100644 --- a/routers/repo/compare.go +++ b/routers/repo/compare.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" csv_module "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/git" @@ -120,6 +121,8 @@ func setCsvCompareContext(ctx *context.Context) { return nil, err } + b = charset.ToUTF8WithFallback(b) + return csv_module.CreateReaderAndGuessDelimiter(b), nil }