Skip to content

Commit

Permalink
hugolib: Integrate new page parser
Browse files Browse the repository at this point in the history
  • Loading branch information
bep committed Oct 18, 2018
1 parent 057b16b commit 7074568
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 94 deletions.
46 changes: 4 additions & 42 deletions hugolib/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ type Page struct {
contentv template.HTML
summary template.HTML
TableOfContents template.HTML

// Passed to the shortcodes
pageWithoutContent *PageWithoutContent

Expand All @@ -161,7 +162,6 @@ type Page struct {

extension string
contentType string
renderable bool

Layout string

Expand All @@ -171,13 +171,8 @@ type Page struct {

linkTitle string

frontmatter []byte

// rawContent is the raw content read from the content file.
rawContent []byte

// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte
// Content items.
pageContent

// whether the content is in a CJK language.
isCJKLanguage bool
Expand Down Expand Up @@ -1756,39 +1751,6 @@ func (p *Page) shouldRenderTo(f output.Format) bool {
return found
}

func (p *Page) parse(reader io.Reader) error {
psr, err := parser.ReadFrom(reader)

if err != nil {
return err
}

p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
p.rawContent = psr.Content()
p.lang = p.Source.File.Lang()

meta, err := psr.Metadata()
if err != nil {
return _errors.Wrap(err, "error in front matter")
}
if meta == nil {
// missing frontmatter equivalent to empty frontmatter
meta = map[string]interface{}{}
}

if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}

return p.update(meta)
}

func (p *Page) RawContent() string {
return string(p.rawContent)
}
Expand Down Expand Up @@ -1871,7 +1833,7 @@ func (p *Page) SaveSource() error {
// TODO(bep) lazy consolidate
func (p *Page) processShortcodes() error {
p.shortcodeState = newShortcodeHandler(p)
tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent())
tmpContent, err := p.shortcodeState.extractShortcodes(p.parsed.Tokens(), p.withoutContent())
if err != nil {
return err
}
Expand Down
83 changes: 83 additions & 0 deletions hugolib/page_content.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hugolib

import (
"io"

"github.com/gohugoio/hugo/parser"
"github.com/gohugoio/hugo/parser/pageparser"
"github.com/pkg/errors"
)

// The content related items on a Page.
type pageContent struct {
renderable bool

frontmatter []byte

// rawContent is the raw content read from the content file.
rawContent []byte

// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte

// The AST of the parsed page. Contains information about:
// shortcodes, front matter, summary indicators.
parsed pageparser.Result
}

func (p *Page) parse(reader io.Reader) error {
// TODO(bep) 2errors consolidate when done
rs := reader.(io.ReadSeeker)

psr, err := parser.ReadFrom(rs)
if err != nil {
return err
}

rs.Seek(0, 0)

psr2, err := pageparser.ReadFrom(rs)
if err != nil {
return err
}

p.parsed = psr2

p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
p.rawContent = psr.Content()
p.lang = p.Source.File.Lang()

meta, err := psr.Metadata()
if err != nil {
return errors.Wrap(err, "error in front matter")
}
if meta == nil {
// missing frontmatter equivalent to empty frontmatter
meta = map[string]interface{}{}
}

if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}

return p.update(meta)
}
14 changes: 1 addition & 13 deletions hugolib/shortcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -615,19 +615,7 @@ Loop:

var shortCodeStart = []byte("{{")

func (s *shortcodeHandler) extractShortcodes(input []byte, p *PageWithoutContent) (string, error) {

startIdx := bytes.Index(input, shortCodeStart)

// short cut for docs with no shortcodes
if startIdx < 0 {
return string(input), nil
}

// the parser takes a string;
// since this is an internal API, it could make sense to use the mutable []byte all the way, but
// it seems that the time isn't really spent in the byte copy operations, and the impl. gets a lot cleaner
pt := pageparser.ParseFrom(input, startIdx)
func (s *shortcodeHandler) extractShortcodes(pt *pageparser.Tokens, p *PageWithoutContent) (string, error) {

result := bp.GetBuffer()
defer bp.PutBuffer(result)
Expand Down
14 changes: 7 additions & 7 deletions hugolib/shortcode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,11 @@ func TestExtractShortcodes(t *testing.T) {
expectErrorMsg string
}{
{"text", "Some text.", "map[]", "Some text.", ""},
{"invalid right delim", "{{< tag }}", "", false, ":4:.*unrecognized character.*}"},
{"invalid close", "\n{{< /tag >}}", "", false, ":5:.*got closing shortcode, but none is open"},
{"invalid close2", "\n\n{{< tag >}}{{< /anotherTag >}}", "", false, ":6: closing tag for shortcode 'anotherTag' does not match start tag"},
{"unterminated quote 1", `{{< figure src="im caption="S" >}}`, "", false, ":4:.got pos.*"},
{"unterminated quote 1", `{{< figure src="im" caption="S >}}`, "", false, ":4:.*unterm.*}"},
{"invalid right delim", "{{< tag }}", "", false, ":8:.*unrecognized character.*}"},
{"invalid close", "\n{{< /tag >}}", "", false, ":9:.*got closing shortcode, but none is open"},
{"invalid close2", "\n\n{{< tag >}}{{< /anotherTag >}}", "", false, ":10: closing tag for shortcode 'anotherTag' does not match start tag"},
{"unterminated quote 1", `{{< figure src="im caption="S" >}}`, "", false, ":8:.got pos.*"},
{"unterminated quote 1", `{{< figure src="im" caption="S >}}`, "", false, ":8:.*unterm.*}"},
{"one shortcode, no markup", "{{< tag >}}", "", testScPlaceholderRegexp, ""},
{"one shortcode, markup", "{{% tag %}}", "", testScPlaceholderRegexp, ""},
{"one pos param", "{{% tag param1 %}}", `tag([\"param1\"], true){[]}"]`, testScPlaceholderRegexp, ""},
Expand Down Expand Up @@ -405,7 +405,7 @@ func TestExtractShortcodes(t *testing.T) {
fmt.Sprintf("Hello %sworld%s. And that's it.", testScPlaceholderRegexp, testScPlaceholderRegexp), ""},
} {

p, _ := pageFromString(simplePage, "simple.md", func(templ tpl.TemplateHandler) error {
p, _ := pageFromString(simplePage+this.input, "simple.md", func(templ tpl.TemplateHandler) error {
templ.AddTemplate("_internal/shortcodes/tag.html", `tag`)
templ.AddTemplate("_internal/shortcodes/sc1.html", `sc1`)
templ.AddTemplate("_internal/shortcodes/sc2.html", `sc2`)
Expand All @@ -424,7 +424,7 @@ func TestExtractShortcodes(t *testing.T) {
return fmt.Sprintf("HAHA%s-%dHBHB", shortcodePlaceholderPrefix, counter)
}

content, err := s.extractShortcodes([]byte(this.input), p.withoutContent())
content, err := s.extractShortcodes(p.parsed.Tokens(), p.withoutContent())

if b, ok := this.expect.(bool); ok && !b {
if err == nil {
Expand Down
2 changes: 2 additions & 0 deletions parser/pageparser/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ type Item struct {
Val []byte
}

type Items []Item

func (i Item) ValStr() string {
return string(i.Val)
}
Expand Down
19 changes: 19 additions & 0 deletions parser/pageparser/page_tokens_getters.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package pageparser

// GetFrontMatter returns the unmarshalled frontmatter data.
func GetFrontMatter(items Items) (map[string]interface{}, error) {
return nil, nil
}
44 changes: 23 additions & 21 deletions parser/pageparser/pagelexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,23 @@ type pageLexer struct {
pos pos // input position
start pos // item start position
width pos // width of last element
lastPos pos // position of the last item returned by nextItem

contentSections int

lexerShortcodeState

// items delivered to client
items []Item
items Items
}

// Implement the Result interface
func (l *pageLexer) Tokens() *Tokens {
return &Tokens{lexer: l, items: l.items}
}

func (l *pageLexer) Items() Items {
return l.items

}

// note: the input position here is normally 0 (start), but
Expand All @@ -79,6 +88,10 @@ func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLe
return lexer
}

func (l *pageLexer) newTokens() *Tokens {
return &Tokens{lexer: l, items: l.items}
}

// main loop
func (l *pageLexer) run() *pageLexer {
for l.state = l.stateStart; l.state != nil; {
Expand Down Expand Up @@ -160,25 +173,12 @@ func (l *pageLexer) ignore() {

var lf = []byte("\n")

// nice to have in error logs
func (l *pageLexer) lineNum() int {
return bytes.Count(l.input[:l.lastPos], lf) + 1
}

// nil terminates the parser
func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))})
return nil
}

// consumes and returns the next item
func (l *pageLexer) nextItem() Item {
item := l.items[0]
l.items = l.items[1:]
l.lastPos = item.pos
return item
}

func (l *pageLexer) consumeCRLF() bool {
var consumed bool
for _, r := range crLf {
Expand Down Expand Up @@ -258,15 +258,16 @@ LOOP:
case r == '#':
return lexFrontMatterOrgMode
case !isSpace(r) && !isEndOfLine(r):
// No front matter.
if r == '<' {
l.emit(tHTMLLead)
// Not need to look further. Hugo treats this as plain HTML,
// no front matter, no shortcodes, no nothing.
l.pos = pos(len(l.input))
l.emit(tText)
break LOOP

}
return l.errorf("failed to detect front matter type; got unknown identifier %q", r)
break LOOP
}
}

Expand Down Expand Up @@ -366,18 +367,19 @@ LOOP:

}

func (l *pageLexer) printCurrentInput() {
fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:]))
}

// Handle YAML or TOML front matter.
func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc {

for i := 0; i < 2; i++ {
if r := l.next(); r != delimr {
return l.errorf("invalid %s delimiter", name)
}
}

if !l.consumeCRLF() {
return l.errorf("invalid %s delimiter", name)
}

// We don't care about the delimiters.
l.ignore()

Expand Down
Loading

0 comments on commit 7074568

Please sign in to comment.