From 44da60d869578423dea529db62ed613588a2a560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Fri, 19 Oct 2018 11:30:57 +0200 Subject: [PATCH] hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 --- go.mod | 1 - go.sum | 3 -- hugolib/page.go | 53 ++------------------- hugolib/page_bundler_handlers.go | 2 - hugolib/page_content.go | 32 +++++++++---- hugolib/page_test.go | 54 ---------------------- parser/metadecoders/decoder.go | 2 +- parser/metadecoders/yaml.go | 2 +- parser/pageparser/item.go | 45 ++++++++++-------- parser/pageparser/pagelexer.go | 2 + parser/pageparser/pageparser.go | 15 +++++- parser/pageparser/pageparser_intro_test.go | 4 +- 12 files changed, 74 insertions(+), 141 deletions(-) diff --git a/go.mod b/go.mod index 5e498370f1f..aa73284e97c 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,6 @@ require ( golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e // indirect golang.org/x/text v0.3.0 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect - gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0 gopkg.in/yaml.v2 v2.2.1 ) diff --git a/go.sum b/go.sum index 7af553217cd..c41cacfb322 100644 --- a/go.sum +++ b/go.sum @@ -65,7 +65,6 @@ github.com/magefile/mage v1.4.0 h1:RI7B1CgnPAuu2O9lWszwya61RLmfL0KCdo+QyyI/Bhk= github.com/magefile/mage v1.4.0/go.mod h1:IUDi13rsHje59lecXokTfGX0QIzO45uVPlXnJYsXepA= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6 h1:LZhVjIISSbj8qLf2qDPP0D8z0uvOWAW5C85ly5mJW6c= github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6/go.mod h1:oTeZL2KHA7CUX6X+fovmK9OvIOFuqu0TwdQrZjLTh88= github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= @@ -144,7 +143,5 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0 h1:POO/ycCATvegFmVuPpQzZFJ+pGZeX22Ufu6fibxDVjU= -gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg= gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/hugolib/page.go b/hugolib/page.go index db4ac4e3e3c..2db0fb5d494 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -19,7 +19,6 @@ import ( "errors" "fmt" "reflect" - "unicode" "github.com/gohugoio/hugo/media" _errors "github.com/pkg/errors" @@ -706,55 +705,13 @@ func (p *Page) UniqueID() string { } // for logging +// TODO(bep) 2errors remove func (p *Page) lineNumRawContentStart() int { return bytes.Count(p.frontmatter, []byte("\n")) + 1 } -var ( - internalSummaryDivider = []byte("HUGOMORE42") -) - -// replaceDivider replaces the with an internal value and returns -// whether the contentis truncated or not. -// Note: The content slice will be modified if needed. -func replaceDivider(content, from, to []byte) ([]byte, bool) { - dividerIdx := bytes.Index(content, from) - if dividerIdx == -1 { - return content, false - } - - afterSummary := content[dividerIdx+len(from):] - - // If the raw content has nothing but whitespace after the summary - // marker then the page shouldn't be marked as truncated. This check - // is simplest against the raw content because different markup engines - // (rst and asciidoc in particular) add div and p elements after the - // summary marker. - truncated := bytes.IndexFunc(afterSummary, func(r rune) bool { return !unicode.IsSpace(r) }) != -1 - - content = append(content[:dividerIdx], append(to, afterSummary...)...) - - return content, truncated - -} - -// We have to replace the with something that survives all the -// rendering engines. -func (p *Page) replaceDivider(content []byte) []byte { - summaryDivider := helpers.SummaryDivider - if p.Markup == "org" { - summaryDivider = []byte("# more") - } - - replaced, truncated := replaceDivider(content, summaryDivider, internalSummaryDivider) - - p.truncated = truncated - - return replaced -} - -// Returns the page as summary and main if a user defined split is provided. -func (p *Page) setUserDefinedSummaryIfProvided(rawContentCopy []byte) (*summaryContent, error) { +// Returns the page as summary and main. +func (p *Page) setUserDefinedSummary(rawContentCopy []byte) (*summaryContent, error) { sc, err := splitUserDefinedSummaryAndContent(p.Markup, rawContentCopy) @@ -1288,10 +1245,10 @@ func (p *Page) prepareForRender() error { return err } - if p.Markup != "html" { + if p.Markup != "html" && p.source.hasSummaryDivider { // Now we know enough to create a summary of the page and count some words - summaryContent, err := p.setUserDefinedSummaryIfProvided(workContentCopy) + summaryContent, err := p.setUserDefinedSummary(workContentCopy) if err != nil { s.Log.ERROR.Printf("Failed to set user defined summary for page %q: %s", p.Path(), err) diff --git a/hugolib/page_bundler_handlers.go b/hugolib/page_bundler_handlers.go index 2d3a6a93041..2ab0ebafed5 100644 --- a/hugolib/page_bundler_handlers.go +++ b/hugolib/page_bundler_handlers.go @@ -276,8 +276,6 @@ func (c *contentHandlers) handlePageContent() contentHandler { p.workContent = helpers.Emojify(p.workContent) } - // TODO(bep) 2errors - p.workContent = p.replaceDivider(p.workContent) p.workContent = p.renderContent(p.workContent) tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent) diff --git a/hugolib/page_content.go b/hugolib/page_content.go index 7d5e3e8d674..0d715f38bf9 100644 --- a/hugolib/page_content.go +++ b/hugolib/page_content.go @@ -23,6 +23,10 @@ import ( "github.com/gohugoio/hugo/parser/pageparser" ) +var ( + internalSummaryDivider = []byte("HUGOMORE42") +) + // The content related items on a Page. type pageContent struct { renderable bool @@ -41,11 +45,12 @@ type pageContent struct { } type rawPageContent struct { + hasSummaryDivider bool + // The AST of the parsed page. Contains information about: // shortcBackup3odes, front matter, summary indicators. // TODO(bep) 2errors add this to a new rawPagecContent struct // with frontMatterItem (pos) etc. - // * also Result.Iterator, Result.Source // * RawContent, RawContentWithoutFrontMatter parsed pageparser.Result } @@ -71,16 +76,15 @@ Loop: it := iter.Next() switch { - case it.Typ == pageparser.TypeIgnore: - case it.Typ == pageparser.TypeHTMLComment: + case it.Type == pageparser.TypeIgnore: + case it.Type == pageparser.TypeHTMLComment: // Ignore. This is only a leading Front matter comment. - case it.Typ == pageparser.TypeHTMLDocument: + case it.Type == pageparser.TypeHTMLDocument: // This is HTML only. No shortcode, front matter etc. p.renderable = false result.Write(it.Val) - // TODO(bep) 2errors commented out frontmatter case it.IsFrontMatter(): - f := metadecoders.FormatFromFrontMatterType(it.Typ) + f := metadecoders.FormatFromFrontMatterType(it.Type) m, err := metadecoders.UnmarshalToMap(it.Val, f) if err != nil { return err @@ -92,11 +96,23 @@ Loop: if !p.shouldBuild() { // Nothing more to do. return nil + } + case it.Type == pageparser.TypeLeadSummaryDivider, it.Type == pageparser.TypeSummaryDividerOrg: + result.Write(internalSummaryDivider) + p.source.hasSummaryDivider = true + // Need to determine if the page is truncated. + f := func(item pageparser.Item) bool { + if item.IsNonWhitespace() { + p.truncated = true + + // Done + return false + } + return true } + iter.PeekWalk(f) - //case it.Typ == pageparser.TypeLeadSummaryDivider, it.Typ == pageparser.TypeSummaryDividerOrg: - // TODO(bep) 2errors store if divider is there and use that to determine if replace or not // Handle shortcode case it.IsLeftShortcodeDelim(): // let extractShortcode handle left delim (will do so recursively) diff --git a/hugolib/page_test.go b/hugolib/page_test.go index bb820b86e3b..7359140fcf7 100644 --- a/hugolib/page_test.go +++ b/hugolib/page_test.go @@ -1272,60 +1272,6 @@ func TestSliceToLower(t *testing.T) { } } -func TestReplaceDivider(t *testing.T) { - t.Parallel() - - tests := []struct { - content string - from string - to string - expectedContent string - expectedTruncated bool - }{ - {"none", "a", "b", "none", false}, - {"summary content", "", "HUGO", "summary HUGO content", true}, - {"summary\n\ndivider", "divider", "HUGO", "summary\n\nHUGO", false}, - {"summary\n\ndivider\n\r", "divider", "HUGO", "summary\n\nHUGO\n\r", false}, - } - - for i, test := range tests { - replaced, truncated := replaceDivider([]byte(test.content), []byte(test.from), []byte(test.to)) - - if truncated != test.expectedTruncated { - t.Fatalf("[%d] Expected truncated to be %t, was %t", i, test.expectedTruncated, truncated) - } - - if string(replaced) != test.expectedContent { - t.Fatalf("[%d] Expected content to be %q, was %q", i, test.expectedContent, replaced) - } - } -} - -func BenchmarkReplaceDivider(b *testing.B) { - divider := "HUGO_DIVIDER" - from, to := []byte(divider), []byte("HUGO_REPLACED") - - withDivider := make([][]byte, b.N) - noDivider := make([][]byte, b.N) - - for i := 0; i < b.N; i++ { - withDivider[i] = []byte(strings.Repeat("Summary ", 5) + "\n" + divider + "\n" + strings.Repeat("Word ", 300)) - noDivider[i] = []byte(strings.Repeat("Word ", 300)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, t1 := replaceDivider(withDivider[i], from, to) - _, t2 := replaceDivider(noDivider[i], from, to) - if !t1 { - b.Fatal("Should be truncated") - } - if t2 { - b.Fatal("Should not be truncated") - } - } -} - func TestPagePaths(t *testing.T) { t.Parallel() diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go index 7527d7a08e1..280361a8411 100644 --- a/parser/metadecoders/decoder.go +++ b/parser/metadecoders/decoder.go @@ -20,7 +20,7 @@ import ( "github.com/chaseadamsio/goorgeous" "github.com/gohugoio/hugo/parser/pageparser" "github.com/pkg/errors" - yaml "gopkg.in/yaml.v1" + yaml "gopkg.in/yaml.v2" ) type Format string diff --git a/parser/metadecoders/yaml.go b/parser/metadecoders/yaml.go index 3a520ac07ab..21b23a9fd0a 100644 --- a/parser/metadecoders/yaml.go +++ b/parser/metadecoders/yaml.go @@ -19,7 +19,7 @@ import ( "fmt" "github.com/spf13/cast" - yaml "gopkg.in/yaml.v1" + yaml "gopkg.in/yaml.v2" ) // HandleYAMLData unmarshals YAML-encoded datum and returns a Go interface diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index d97fed734c8..afc3b5fab32 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -13,10 +13,13 @@ package pageparser -import "fmt" +import ( + "bytes" + "fmt" +) type Item struct { - Typ ItemType + Type ItemType pos pos Val []byte } @@ -28,65 +31,69 @@ func (i Item) ValStr() string { } func (i Item) IsText() bool { - return i.Typ == tText + return i.Type == tText +} + +func (i Item) IsNonWhitespace() bool { + return len(bytes.TrimSpace(i.Val)) > 0 } func (i Item) IsShortcodeName() bool { - return i.Typ == tScName + return i.Type == tScName } func (i Item) IsLeftShortcodeDelim() bool { - return i.Typ == tLeftDelimScWithMarkup || i.Typ == tLeftDelimScNoMarkup + return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup } func (i Item) IsRightShortcodeDelim() bool { - return i.Typ == tRightDelimScWithMarkup || i.Typ == tRightDelimScNoMarkup + return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup } func (i Item) IsShortcodeClose() bool { - return i.Typ == tScClose + return i.Type == tScClose } func (i Item) IsShortcodeParam() bool { - return i.Typ == tScParam + return i.Type == tScParam } func (i Item) IsShortcodeParamVal() bool { - return i.Typ == tScParamVal + return i.Type == tScParamVal } func (i Item) IsShortcodeMarkupDelimiter() bool { - return i.Typ == tLeftDelimScWithMarkup || i.Typ == tRightDelimScWithMarkup + return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup } func (i Item) IsFrontMatter() bool { - return i.Typ >= TypeFrontMatterYAML && i.Typ <= TypeFrontMatterORG + return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG } func (i Item) IsDone() bool { - return i.Typ == tError || i.Typ == tEOF + return i.Type == tError || i.Type == tEOF } func (i Item) IsEOF() bool { - return i.Typ == tEOF + return i.Type == tEOF } func (i Item) IsError() bool { - return i.Typ == tError + return i.Type == tError } func (i Item) String() string { switch { - case i.Typ == tEOF: + case i.Type == tEOF: return "EOF" - case i.Typ == tError: + case i.Type == tError: return string(i.Val) - case i.Typ > tKeywordMarker: + case i.Type > tKeywordMarker: return fmt.Sprintf("<%s>", i.Val) case len(i.Val) > 50: - return fmt.Sprintf("%v:%.20q...", i.Typ, i.Val) + return fmt.Sprintf("%v:%.20q...", i.Type, i.Val) } - return fmt.Sprintf("%v:[%s]", i.Typ, i.Val) + return fmt.Sprintf("%v:[%s]", i.Type, i.Val) } type ItemType int diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 7768b0b2fb8..a6a26016b01 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -235,6 +235,7 @@ func lexMainSection(l *pageLexer) stateFunc { } l.summaryDividerChecked = true l.pos += pos(len(summaryDivider)) + //l.consumeCRLF() l.emit(TypeLeadSummaryDivider) } else if l.hasPrefix(summaryDividerOrg) { if l.pos > l.start { @@ -242,6 +243,7 @@ func lexMainSection(l *pageLexer) stateFunc { } l.summaryDividerChecked = true l.pos += pos(len(summaryDividerOrg)) + //l.consumeCRLF() l.emit(TypeSummaryDividerOrg) } } diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go index b4cdef75ca1..bc6f55dd81c 100644 --- a/parser/pageparser/pageparser.go +++ b/parser/pageparser/pageparser.go @@ -86,7 +86,7 @@ func (t *Iterator) Backup() { // check for non-error and non-EOF types coming next func (t *Iterator) IsValueNext() bool { i := t.Peek() - return i.Typ != tError && i.Typ != tEOF + return i.Type != tError && i.Type != tEOF } // look at, but do not consume, the next item @@ -95,12 +95,23 @@ func (t *Iterator) Peek() Item { return t.l.items[t.lastPos+1] } +// PeekWalk will feed the next items in the iterator to walkFn +// until it returns false. +func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { + for i := t.lastPos + 1; i < pos(len(t.l.items)); i++ { + item := t.l.items[i] + if !walkFn(item) { + break + } + } +} + // Consume is a convencience method to consume the next n tokens, // but back off Errors and EOF. func (t *Iterator) Consume(cnt int) { for i := 0; i < cnt; i++ { token := t.Next() - if token.Typ == tError || token.Typ == tEOF { + if token.Type == tError || token.Type == tEOF { t.Backup() break } diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index bfd19c250c3..850254ac7b6 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -91,7 +91,7 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items [] for { item := t.Next() items = append(items, item) - if item.Typ == tEOF || item.Typ == tError { + if item.Type == tEOF || item.Type == tError { break } } @@ -104,7 +104,7 @@ func equal(i1, i2 []Item) bool { return false } for k := range i1 { - if i1[k].Typ != i2[k].Typ { + if i1[k].Type != i2[k].Type { return false } if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {