Skip to content

Commit

Permalink
parser/pageparser: Add front matter etc. support
Browse files Browse the repository at this point in the history
  • Loading branch information
bep committed Oct 17, 2018
1 parent 2d54c5f commit ee0ce98
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 32 deletions.
6 changes: 4 additions & 2 deletions parser/pageparser/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ const (
tError itemType = iota
tEOF

// page items
tHTMLLead // <

// shortcode items
tLeftDelimScNoMarkup
tRightDelimScNoMarkup
Expand All @@ -95,8 +98,7 @@ const (
tScParam
tScParamVal

//itemIdentifier
tText // plain text, used for everything outside the shortcodes
tText // plain text

// preserved for later - keywords come after this
tKeywordMarker
Expand Down
68 changes: 44 additions & 24 deletions parser/pageparser/pagelexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,18 @@ type lexerShortcodeState struct {
}

type pageLexer struct {
name string
input string
state stateFunc
pos pos // input position
start pos // item start position
width pos // width of last element
lastPos pos // position of the last item returned by nextItem

// Set once front matter is read OK.
frontMatterRead bool
// Set when we see a non-whitespace character
noneWhiteSpaceSeen bool

lexerShortcodeState

// items delivered to client
Expand All @@ -63,16 +67,15 @@ func Parse(s string) *Tokens {
}

func ParseFrom(s string, from int) *Tokens {
lexer := newPageLexer("default", s, pos(from))
lexer := newPageLexer(s, pos(from))
lexer.run()
return &Tokens{lexer: lexer}
}

// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
func newPageLexer(name, input string, inputPosition pos) *pageLexer {
func newPageLexer(input string, inputPosition pos) *pageLexer {
lexer := &pageLexer{
name: name,
input: input,
pos: inputPosition,
lexerShortcodeState: lexerShortcodeState{
Expand All @@ -88,7 +91,7 @@ func newPageLexer(name, input string, inputPosition pos) *pageLexer {

// main loop
func (l *pageLexer) run() *pageLexer {
for l.state = lexTextOutsideShortcodes; l.state != nil; {
for l.state = lexMain; l.state != nil; {
l.state = l.state(l)
}
return l
Expand Down Expand Up @@ -178,28 +181,45 @@ func (l *pageLexer) nextItem() Item {
return item
}

// scans until an opening shortcode opening bracket.
// if no shortcodes, it will keep on scanning until EOF
func lexTextOutsideShortcodes(l *pageLexer) stateFunc {
func lexMain(l *pageLexer) stateFunc {
LOOP:
for {
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {
if l.pos > l.start {
l.emit(tText)
}
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
l.currLeftDelimItem = tLeftDelimScWithMarkup
l.currRightDelimItem = tRightDelimScWithMarkup
} else {
l.currLeftDelimItem = tLeftDelimScNoMarkup
l.currRightDelimItem = tRightDelimScNoMarkup
// TODO(bep) 2errors split these in 2 somehow
if l.frontMatterRead {
l.noneWhiteSpaceSeen = true
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {
if l.pos > l.start {
l.emit(tText)
}
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
l.currLeftDelimItem = tLeftDelimScWithMarkup
l.currRightDelimItem = tRightDelimScWithMarkup
} else {
l.currLeftDelimItem = tLeftDelimScNoMarkup
l.currRightDelimItem = tRightDelimScNoMarkup
}
return lexShortcodeLeftDelim
}
return lexShortcodeLeftDelim

}
if l.next() == eof {
break

r := l.next()
switch {
case r == eof:
break LOOP
case r == '<':
if !l.noneWhiteSpaceSeen {
l.emit(tHTMLLead)
// Not need to look further.
l.pos = pos(len(l.input))
l.emit(tText)
break LOOP
}
case !isSpace(r):
l.noneWhiteSpaceSeen = true
}

}

// Done!
if l.pos > l.start {
l.emit(tText)
Expand Down Expand Up @@ -234,14 +254,14 @@ func lexShortcodeComment(l *pageLexer) stateFunc {
l.ignore()
l.pos += pos(len(l.currentRightShortcodeDelim()))
l.emit(tText)
return lexTextOutsideShortcodes
return lexMain
}

func lexShortcodeRightDelim(l *pageLexer) stateFunc {
l.closingState = 0
l.pos += pos(len(l.currentRightShortcodeDelim()))
l.emit(l.currentRightShortcodeDelimItem())
return lexTextOutsideShortcodes
return lexMain
}

// either:
Expand Down
34 changes: 28 additions & 6 deletions parser/pageparser/pageparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"testing"
)

type shortCodeLexerTest struct {
type lexerTest struct {
name string
input string
items []Item
Expand All @@ -39,7 +39,7 @@ var (
tstVal = Item{tScParamVal, 0, "Hello World"}
)

var shortCodeLexerTests = []shortCodeLexerTest{
var shortCodeLexerTests = []lexerTest{
{"empty", "", []Item{tstEOF}},
{"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
{"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
Expand Down Expand Up @@ -159,7 +159,7 @@ var shortCodeLexerTests = []shortCodeLexerTest{
func TestShortcodeLexer(t *testing.T) {
t.Parallel()
for i, test := range shortCodeLexerTests {
items := collect(&test)
items := collect(test.name, test.input, true)
if !equal(items, test.items) {
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
}
Expand All @@ -170,16 +170,38 @@ func BenchmarkShortcodeLexer(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, test := range shortCodeLexerTests {
items := collect(&test)
items := collect(test.name, test.input, true)
if !equal(items, test.items) {
b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
}
}
}
}

func collect(t *shortCodeLexerTest) (items []Item) {
l := newPageLexer(t.name, t.input, 0).run()
var (
tstHTMLLead = Item{tHTMLLead, 0, " <"}
)

var frontMatterTests = []lexerTest{
{"empty", "", []Item{tstEOF}},
{"HTML Document", ` <html>`, []Item{tstHTMLLead, Item{tText, 0, "html>"}, tstEOF}},
}

func TestFrontMatter(t *testing.T) {
t.Parallel()
for i, test := range frontMatterTests {
items := collect(test.name, test.input, false)
if !equal(items, test.items) {
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
}
}
}

func collect(name, input string, skipFrontMatter bool) (items []Item) {
l := newPageLexer(input, 0)
l.frontMatterRead = skipFrontMatter
l.run()

for {
item := l.nextItem()
items = append(items, item)
Expand Down

0 comments on commit ee0ce98

Please sign in to comment.