parser/pageparser: File renames and splitting

See gohugoio#5324
bep · Oct 17, 2018 · 420198d · 420198d
1 parent 8b088e1
commit 420198d
Show file tree

Hide file tree

Showing 4 changed files with 197 additions and 165 deletions.
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
@@ -0,0 +1,103 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pageparser
+
+import "fmt"
+
+type Item struct {
+	typ itemType
+	pos pos
+	Val string
+}
+
+func (i Item) IsText() bool {
+	return i.typ == tText
+}
+
+func (i Item) IsShortcodeName() bool {
+	return i.typ == tScName
+}
+
+func (i Item) IsLeftShortcodeDelim() bool {
+	return i.typ == tLeftDelimScWithMarkup || i.typ == tLeftDelimScNoMarkup
+}
+
+func (i Item) IsRightShortcodeDelim() bool {
+	return i.typ == tRightDelimScWithMarkup || i.typ == tRightDelimScNoMarkup
+}
+
+func (i Item) IsShortcodeClose() bool {
+	return i.typ == tScClose
+}
+
+func (i Item) IsShortcodeParam() bool {
+	return i.typ == tScParam
+}
+
+func (i Item) IsShortcodeParamVal() bool {
+	return i.typ == tScParamVal
+}
+
+func (i Item) IsShortcodeMarkupDelimiter() bool {
+	return i.typ == tLeftDelimScWithMarkup || i.typ == tRightDelimScWithMarkup
+}
+
+func (i Item) IsDone() bool {
+	return i.typ == tError || i.typ == tEOF
+}
+
+func (i Item) IsEOF() bool {
+	return i.typ == tEOF
+}
+
+func (i Item) IsError() bool {
+	return i.typ == tError
+}
+
+func (i Item) String() string {
+	switch {
+	case i.typ == tEOF:
+		return "EOF"
+	case i.typ == tError:
+		return i.Val
+	case i.typ > tKeywordMarker:
+		return fmt.Sprintf("<%s>", i.Val)
+	case len(i.Val) > 20:
+		return fmt.Sprintf("%.20q...", i.Val)
+	}
+	return fmt.Sprintf("[%s]", i.Val)
+}
+
+type itemType int
+
+const (
+	tError itemType = iota
+	tEOF
+
+	// shortcode items
+	tLeftDelimScNoMarkup
+	tRightDelimScNoMarkup
+	tLeftDelimScWithMarkup
+	tRightDelimScWithMarkup
+	tScClose
+	tScName
+	tScParam
+	tScParamVal
+
+	//itemIdentifier
+	tText // plain text, used for everything outside the shortcodes
+
+	// preserved for later - keywords come after this
+	tKeywordMarker
+)
diff --git a/parser/pageparser/shortcodeparser.go → parser/pageparser/pagelexer.go b/parser/pageparser/shortcodeparser.go → parser/pageparser/pagelexer.go
@@ -11,6 +11,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
 package pageparser
 
 import (
@@ -20,171 +24,9 @@ import (
 	"unicode/utf8"
 )
 
-// The lexical scanning below is highly inspired by the great talk given by
-// Rob Pike called "Lexical Scanning in Go" (it's on YouTube, Google it!).
-// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
-
-// parsing
-
-type Tokens struct {
-	lexer     *pagelexer
-	token     [3]Item // 3-item look-ahead is what we currently need
-	peekCount int
-}
-
-func (t *Tokens) Next() Item {
-	if t.peekCount > 0 {
-		t.peekCount--
-	} else {
-		t.token[0] = t.lexer.nextItem()
-	}
-	return t.token[t.peekCount]
-}
-
-// backs up one token.
-func (t *Tokens) Backup() {
-	t.peekCount++
-}
-
-// backs up two tokens.
-func (t *Tokens) Backup2(t1 Item) {
-	t.token[1] = t1
-	t.peekCount = 2
-}
-
-// backs up three tokens.
-func (t *Tokens) Backup3(t2, t1 Item) {
-	t.token[1] = t1
-	t.token[2] = t2
-	t.peekCount = 3
-}
-
-// check for non-error and non-EOF types coming next
-func (t *Tokens) IsValueNext() bool {
-	i := t.Peek()
-	return i.typ != tError && i.typ != tEOF
-}
-
-// look at, but do not consume, the next item
-// repeated, sequential calls will return the same item
-func (t *Tokens) Peek() Item {
-	if t.peekCount > 0 {
-		return t.token[t.peekCount-1]
-	}
-	t.peekCount = 1
-	t.token[0] = t.lexer.nextItem()
-	return t.token[0]
-}
-
-// Consume is a convencience method to consume the next n tokens,
-// but back off Errors and EOF.
-func (t *Tokens) Consume(cnt int) {
-	for i := 0; i < cnt; i++ {
-		token := t.Next()
-		if token.typ == tError || token.typ == tEOF {
-			t.Backup()
-			break
-		}
-	}
-}
-
-// LineNumber returns the current line number. Used for logging.
-func (t *Tokens) LineNumber() int {
-	return t.lexer.lineNum()
-}
-
-// lexical scanning
-
 // position (in bytes)
 type pos int
 
-type Item struct {
-	typ itemType
-	pos pos
-	Val string
-}
-
-func (i Item) IsText() bool {
-	return i.typ == tText
-}
-
-func (i Item) IsShortcodeName() bool {
-	return i.typ == tScName
-}
-
-func (i Item) IsLeftShortcodeDelim() bool {
-	return i.typ == tLeftDelimScWithMarkup || i.typ == tLeftDelimScNoMarkup
-}
-
-func (i Item) IsRightShortcodeDelim() bool {
-	return i.typ == tRightDelimScWithMarkup || i.typ == tRightDelimScNoMarkup
-}
-
-func (i Item) IsShortcodeClose() bool {
-	return i.typ == tScClose
-}
-
-func (i Item) IsShortcodeParam() bool {
-	return i.typ == tScParam
-}
-
-func (i Item) IsShortcodeParamVal() bool {
-	return i.typ == tScParamVal
-}
-
-func (i Item) IsShortcodeMarkupDelimiter() bool {
-	return i.typ == tLeftDelimScWithMarkup || i.typ == tRightDelimScWithMarkup
-}
-
-func (i Item) IsDone() bool {
-	return i.typ == tError || i.typ == tEOF
-}
-
-func (i Item) IsEOF() bool {
-	return i.typ == tEOF
-}
-
-func (i Item) IsError() bool {
-	return i.typ == tError
-}
-
-func (i Item) String() string {
-	switch {
-	case i.typ == tEOF:
-		return "EOF"
-	case i.typ == tError:
-		return i.Val
-	case i.typ > tKeywordMarker:
-		return fmt.Sprintf("<%s>", i.Val)
-	case len(i.Val) > 20:
-		return fmt.Sprintf("%.20q...", i.Val)
-	}
-	return fmt.Sprintf("[%s]", i.Val)
-}
-
-type itemType int
-
-const (
-	tError itemType = iota
-	tEOF
-
-	// shortcode items
-	tLeftDelimScNoMarkup
-	tRightDelimScNoMarkup
-	tLeftDelimScWithMarkup
-	tRightDelimScWithMarkup
-	tScClose
-	tScName
-	tScParam
-	tScParamVal
-
-	//itemIdentifier
-	tText // plain text, used for everything outside the shortcodes
-
-	// preserved for later - keywords come after this
-	tKeywordMarker
-)
-
 const eof = -1
 
 // returns the next state in scanner.
@@ -217,12 +59,12 @@ func Parse(s string) *Tokens {
 }
 
 func ParseFrom(s string, from int) *Tokens {
-	return &Tokens{lexer: newShortcodeLexer("default", s, pos(from))}
+	return &Tokens{lexer: newPageLexer("default", s, pos(from))}
 }
 
 // note: the input position here is normally 0 (start), but
 // can be set if position of first shortcode is known
-func newShortcodeLexer(name, input string, inputPosition pos) *pagelexer {
+func newPageLexer(name, input string, inputPosition pos) *pagelexer {
 	lexer := &pagelexer{
 		name:               name,
 		input:              input,

diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
@@ -0,0 +1,87 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+// The lexical scanning below
+
+type Tokens struct {
+	lexer     *pagelexer
+	token     [3]Item // 3-item look-ahead is what we currently need
+	peekCount int
+}
+
+func (t *Tokens) Next() Item {
+	if t.peekCount > 0 {
+		t.peekCount--
+	} else {
+		t.token[0] = t.lexer.nextItem()
+	}
+	return t.token[t.peekCount]
+}
+
+// backs up one token.
+func (t *Tokens) Backup() {
+	t.peekCount++
+}
+
+// backs up two tokens.
+func (t *Tokens) Backup2(t1 Item) {
+	t.token[1] = t1
+	t.peekCount = 2
+}
+
+// backs up three tokens.
+func (t *Tokens) Backup3(t2, t1 Item) {
+	t.token[1] = t1
+	t.token[2] = t2
+	t.peekCount = 3
+}
+
+// check for non-error and non-EOF types coming next
+func (t *Tokens) IsValueNext() bool {
+	i := t.Peek()
+	return i.typ != tError && i.typ != tEOF
+}
+
+// look at, but do not consume, the next item
+// repeated, sequential calls will return the same item
+func (t *Tokens) Peek() Item {
+	if t.peekCount > 0 {
+		return t.token[t.peekCount-1]
+	}
+	t.peekCount = 1
+	t.token[0] = t.lexer.nextItem()
+	return t.token[0]
+}
+
+// Consume is a convencience method to consume the next n tokens,
+// but back off Errors and EOF.
+func (t *Tokens) Consume(cnt int) {
+	for i := 0; i < cnt; i++ {
+		token := t.Next()
+		if token.typ == tError || token.typ == tEOF {
+			t.Backup()
+			break
+		}
+	}
+}
+
+// LineNumber returns the current line number. Used for logging.
+func (t *Tokens) LineNumber() int {
+	return t.lexer.lineNum()
+}
diff --git a/parser/pageparser/shortcodeparser_test.go → parser/pageparser/pageparser_test.go b/parser/pageparser/shortcodeparser_test.go → parser/pageparser/pageparser_test.go
@@ -179,7 +179,7 @@ func BenchmarkShortcodeLexer(b *testing.B) {
 }
 
 func collect(t *shortCodeLexerTest) (items []Item) {
-	l := newShortcodeLexer(t.name, t.input, 0)
+	l := newPageLexer(t.name, t.input, 0)
 	for {
 		item := l.nextItem()
 		items = append(items, item)