diff --git a/hugolib/page.go b/hugolib/page.go
index e867dd52560..7c3cfccb02f 100644
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -141,6 +141,7 @@ type Page struct {
 	contentv        template.HTML
 	summary         template.HTML
 	TableOfContents template.HTML
+
 	// Passed to the shortcodes
 	pageWithoutContent *PageWithoutContent
 
@@ -161,7 +162,6 @@ type Page struct {
 
 	extension   string
 	contentType string
-	renderable  bool
 
 	Layout string
 
@@ -171,13 +171,8 @@ type Page struct {
 
 	linkTitle string
 
-	frontmatter []byte
-
-	// rawContent is the raw content read from the content file.
-	rawContent []byte
-
-	// workContent is a copy of rawContent that may be mutated during site build.
-	workContent []byte
+	// Content items.
+	pageContent
 
 	// whether the content is in a CJK language.
 	isCJKLanguage bool
@@ -1756,39 +1751,6 @@ func (p *Page) shouldRenderTo(f output.Format) bool {
 	return found
 }
 
-func (p *Page) parse(reader io.Reader) error {
-	psr, err := parser.ReadFrom(reader)
-
-	if err != nil {
-		return err
-	}
-
-	p.renderable = psr.IsRenderable()
-	p.frontmatter = psr.FrontMatter()
-	p.rawContent = psr.Content()
-	p.lang = p.Source.File.Lang()
-
-	meta, err := psr.Metadata()
-	if err != nil {
-		return _errors.Wrap(err, "error in front matter")
-	}
-	if meta == nil {
-		// missing frontmatter equivalent to empty frontmatter
-		meta = map[string]interface{}{}
-	}
-
-	if p.s != nil && p.s.owner != nil {
-		gi, enabled := p.s.owner.gitInfo.forPage(p)
-		if gi != nil {
-			p.GitInfo = gi
-		} else if enabled {
-			p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
-		}
-	}
-
-	return p.update(meta)
-}
-
 func (p *Page) RawContent() string {
 	return string(p.rawContent)
 }
@@ -1871,7 +1833,7 @@ func (p *Page) SaveSource() error {
 // TODO(bep) lazy consolidate
 func (p *Page) processShortcodes() error {
 	p.shortcodeState = newShortcodeHandler(p)
-	tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent())
+	tmpContent, err := p.shortcodeState.extractShortcodes(p.parsed.Tokens(), p.withoutContent())
 	if err != nil {
 		return err
 	}
diff --git a/hugolib/page_content.go b/hugolib/page_content.go
new file mode 100644
index 00000000000..02b34c773e4
--- /dev/null
+++ b/hugolib/page_content.go
@@ -0,0 +1,83 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+	"io"
+
+	"github.com/gohugoio/hugo/parser"
+	"github.com/gohugoio/hugo/parser/pageparser"
+	"github.com/pkg/errors"
+)
+
+// The content related items on a Page.
+type pageContent struct {
+	renderable bool
+
+	frontmatter []byte
+
+	// rawContent is the raw content read from the content file.
+	rawContent []byte
+
+	// workContent is a copy of rawContent that may be mutated during site build.
+	workContent []byte
+
+	// The AST of the parsed page. Contains information about:
+	// shortcodes, front matter, summary indicators.
+	parsed pageparser.Result
+}
+
+func (p *Page) parse(reader io.Reader) error {
+	// TODO(bep) 2errors consolidate when done
+	rs := reader.(io.ReadSeeker)
+
+	psr, err := parser.ReadFrom(rs)
+	if err != nil {
+		return err
+	}
+
+	rs.Seek(0, 0)
+
+	psr2, err := pageparser.ReadFrom(rs)
+	if err != nil {
+		return err
+	}
+
+	p.parsed = psr2
+
+	p.renderable = psr.IsRenderable()
+	p.frontmatter = psr.FrontMatter()
+	p.rawContent = psr.Content()
+	p.lang = p.Source.File.Lang()
+
+	meta, err := psr.Metadata()
+	if err != nil {
+		return errors.Wrap(err, "error in front matter")
+	}
+	if meta == nil {
+		// missing frontmatter equivalent to empty frontmatter
+		meta = map[string]interface{}{}
+	}
+
+	if p.s != nil && p.s.owner != nil {
+		gi, enabled := p.s.owner.gitInfo.forPage(p)
+		if gi != nil {
+			p.GitInfo = gi
+		} else if enabled {
+			p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
+		}
+	}
+
+	return p.update(meta)
+}
diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go
index a21a10ad242..8fad595c1ca 100644
--- a/hugolib/shortcode.go
+++ b/hugolib/shortcode.go
@@ -615,19 +615,7 @@ Loop:
 
 var shortCodeStart = []byte("{{")
 
-func (s *shortcodeHandler) extractShortcodes(input []byte, p *PageWithoutContent) (string, error) {
-
-	startIdx := bytes.Index(input, shortCodeStart)
-
-	// short cut for docs with no shortcodes
-	if startIdx < 0 {
-		return string(input), nil
-	}
-
-	// the parser takes a string;
-	// since this is an internal API, it could make sense to use the mutable []byte all the way, but
-	// it seems that the time isn't really spent in the byte copy operations, and the impl. gets a lot cleaner
-	pt := pageparser.ParseFrom(input, startIdx)
+func (s *shortcodeHandler) extractShortcodes(pt *pageparser.Tokens, p *PageWithoutContent) (string, error) {
 
 	result := bp.GetBuffer()
 	defer bp.PutBuffer(result)
diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go
index f8837810c91..5f9e2e3454a 100644
--- a/hugolib/shortcode_test.go
+++ b/hugolib/shortcode_test.go
@@ -365,11 +365,11 @@ func TestExtractShortcodes(t *testing.T) {
 		expectErrorMsg   string
 	}{
 		{"text", "Some text.", "map[]", "Some text.", ""},
-		{"invalid right delim", "{{< tag }}", "", false, ":4:.*unrecognized character.*}"},
-		{"invalid close", "\n{{< /tag >}}", "", false, ":5:.*got closing shortcode, but none is open"},
-		{"invalid close2", "\n\n{{< tag >}}{{< /anotherTag >}}", "", false, ":6: closing tag for shortcode 'anotherTag' does not match start tag"},
-		{"unterminated quote 1", `{{< figure src="im caption="S" >}}`, "", false, ":4:.got pos.*"},
-		{"unterminated quote 1", `{{< figure src="im" caption="S >}}`, "", false, ":4:.*unterm.*}"},
+		{"invalid right delim", "{{< tag }}", "", false, ":8:.*unrecognized character.*}"},
+		{"invalid close", "\n{{< /tag >}}", "", false, ":9:.*got closing shortcode, but none is open"},
+		{"invalid close2", "\n\n{{< tag >}}{{< /anotherTag >}}", "", false, ":10: closing tag for shortcode 'anotherTag' does not match start tag"},
+		{"unterminated quote 1", `{{< figure src="im caption="S" >}}`, "", false, ":8:.got pos.*"},
+		{"unterminated quote 1", `{{< figure src="im" caption="S >}}`, "", false, ":8:.*unterm.*}"},
 		{"one shortcode, no markup", "{{< tag >}}", "", testScPlaceholderRegexp, ""},
 		{"one shortcode, markup", "{{% tag %}}", "", testScPlaceholderRegexp, ""},
 		{"one pos param", "{{% tag param1 %}}", `tag([\"param1\"], true){[]}"]`, testScPlaceholderRegexp, ""},
@@ -405,7 +405,7 @@ func TestExtractShortcodes(t *testing.T) {
 			fmt.Sprintf("Hello %sworld%s. And that's it.", testScPlaceholderRegexp, testScPlaceholderRegexp), ""},
 	} {
 
-		p, _ := pageFromString(simplePage, "simple.md", func(templ tpl.TemplateHandler) error {
+		p, _ := pageFromString(simplePage+this.input, "simple.md", func(templ tpl.TemplateHandler) error {
 			templ.AddTemplate("_internal/shortcodes/tag.html", `tag`)
 			templ.AddTemplate("_internal/shortcodes/sc1.html", `sc1`)
 			templ.AddTemplate("_internal/shortcodes/sc2.html", `sc2`)
@@ -424,7 +424,7 @@ func TestExtractShortcodes(t *testing.T) {
 			return fmt.Sprintf("HAHA%s-%dHBHB", shortcodePlaceholderPrefix, counter)
 		}
 
-		content, err := s.extractShortcodes([]byte(this.input), p.withoutContent())
+		content, err := s.extractShortcodes(p.parsed.Tokens(), p.withoutContent())
 
 		if b, ok := this.expect.(bool); ok && !b {
 			if err == nil {
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
index 6e93bb696d4..dc4d9f67e84 100644
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -21,6 +21,8 @@ type Item struct {
 	Val []byte
 }
 
+type Items []Item
+
 func (i Item) ValStr() string {
 	return string(i.Val)
 }
diff --git a/parser/pageparser/page_tokens_getters.go b/parser/pageparser/page_tokens_getters.go
new file mode 100644
index 00000000000..a08225f3894
--- /dev/null
+++ b/parser/pageparser/page_tokens_getters.go
@@ -0,0 +1,19 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pageparser
+
+// GetFrontMatter returns the unmarshalled frontmatter data.
+func GetFrontMatter(items Items) (map[string]interface{}, error) {
+	return nil, nil
+}
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index c15e977ca31..90cf3ddfc0d 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -50,14 +50,23 @@ type pageLexer struct {
 	pos        pos // input position
 	start      pos // item start position
 	width      pos // width of last element
-	lastPos    pos // position of the last item returned by nextItem
 
 	contentSections int
 
 	lexerShortcodeState
 
 	// items delivered to client
-	items []Item
+	items Items
+}
+
+// Implement the Result interface
+func (l *pageLexer) Tokens() *Tokens {
+	return &Tokens{lexer: l, items: l.items}
+}
+
+func (l *pageLexer) Items() Items {
+	return l.items
+
 }
 
 // note: the input position here is normally 0 (start), but
@@ -79,6 +88,10 @@ func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLe
 	return lexer
 }
 
+func (l *pageLexer) newTokens() *Tokens {
+	return &Tokens{lexer: l, items: l.items}
+}
+
 // main loop
 func (l *pageLexer) run() *pageLexer {
 	for l.state = l.stateStart; l.state != nil; {
@@ -160,25 +173,12 @@ func (l *pageLexer) ignore() {
 
 var lf = []byte("\n")
 
-// nice to have in error logs
-func (l *pageLexer) lineNum() int {
-	return bytes.Count(l.input[:l.lastPos], lf) + 1
-}
-
 // nil terminates the parser
 func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
 	l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))})
 	return nil
 }
 
-// consumes and returns the next item
-func (l *pageLexer) nextItem() Item {
-	item := l.items[0]
-	l.items = l.items[1:]
-	l.lastPos = item.pos
-	return item
-}
-
 func (l *pageLexer) consumeCRLF() bool {
 	var consumed bool
 	for _, r := range crLf {
@@ -258,15 +258,16 @@ LOOP:
 		case r == '#':
 			return lexFrontMatterOrgMode
 		case !isSpace(r) && !isEndOfLine(r):
+			// No front matter.
 			if r == '<' {
 				l.emit(tHTMLLead)
 				// Not need to look further. Hugo treats this as plain HTML,
 				// no front matter, no shortcodes, no nothing.
 				l.pos = pos(len(l.input))
 				l.emit(tText)
-				break LOOP
+
 			}
-			return l.errorf("failed to detect front matter type; got unknown identifier %q", r)
+			break LOOP
 		}
 	}
 
@@ -366,18 +367,19 @@ LOOP:
 
 }
 
+func (l *pageLexer) printCurrentInput() {
+	fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:]))
+}
+
 // Handle YAML or TOML front matter.
 func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc {
+
 	for i := 0; i < 2; i++ {
 		if r := l.next(); r != delimr {
 			return l.errorf("invalid %s delimiter", name)
 		}
 	}
 
-	if !l.consumeCRLF() {
-		return l.errorf("invalid %s delimiter", name)
-	}
-
 	// We don't care about the delimiters.
 	l.ignore()
 
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
index 948c05edf28..cc5c2ddf3c3 100644
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -17,27 +17,69 @@
 // See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
 package pageparser
 
-func Parse(input []byte) *Tokens {
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+
+	"github.com/pkg/errors"
+)
+
+type Result interface {
+	Tokens() *Tokens
+	Items() Items
+}
+
+var _ Result = (*pageLexer)(nil)
+
+func Parse(input []byte) Result {
 	return ParseFrom(input, 0)
 }
 
-func ParseFrom(input []byte, from int) *Tokens {
+// TODO(bep) consolidate (remove superflous) and have one or two Parse* methods.
+func ReadFrom(r io.Reader) (Result, error) {
+	b, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read page content")
+	}
+	lexer := newPageLexer(b, 0, lexIntroSection)
+	lexer.run()
+	return lexer, nil
+
+}
+
+func ParseFrom(input []byte, from int) Result {
 	lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
 	lexer.run()
-	return &Tokens{lexer: lexer}
+	return lexer
 }
 
 type Tokens struct {
-	lexer     *pageLexer
+	lexer   *pageLexer
+	items   Items
+	lastPos pos // position of the last item returned by nextItem
+
 	token     [3]Item // 3-item look-ahead is what we currently need
 	peekCount int
 }
 
+// consumes and returns the next item
+func (l *Tokens) nextItem() Item {
+	item := l.items[0]
+	l.items = l.items[1:]
+	l.lastPos = item.pos
+	return item
+}
+
+func (t *Tokens) Content() []byte {
+	return t.lexer.input
+}
+
 func (t *Tokens) Next() Item {
 	if t.peekCount > 0 {
 		t.peekCount--
 	} else {
-		t.token[0] = t.lexer.nextItem()
+		t.token[0] = t.nextItem()
 	}
 	return t.token[t.peekCount]
 }
@@ -73,7 +115,7 @@ func (t *Tokens) Peek() Item {
 		return t.token[t.peekCount-1]
 	}
 	t.peekCount = 1
-	t.token[0] = t.lexer.nextItem()
+	t.token[0] = t.nextItem()
 	return t.token[0]
 }
 
@@ -91,5 +133,5 @@ func (t *Tokens) Consume(cnt int) {
 
 // LineNumber returns the current line number. Used for logging.
 func (t *Tokens) LineNumber() int {
-	return t.lexer.lineNum()
+	return bytes.Count(t.lexer.input[:t.lastPos], lf) + 1
 }
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index 19e30dc9adb..fc4197682d6 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -33,9 +33,9 @@ func nti(tp itemType, val string) Item {
 var (
 	tstJSON                = `{ "a": { "b": "\"Hugo\"}" } }`
 	tstHTMLLead            = nti(tHTMLLead, "  <")
-	tstFrontMatterTOML     = nti(tFrontMatterTOML, "foo = \"bar\"\n")
-	tstFrontMatterYAML     = nti(tFrontMatterYAML, "foo: \"bar\"\n")
-	tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "foo: \"bar\"\r\n")
+	tstFrontMatterTOML     = nti(tFrontMatterTOML, "\nfoo = \"bar\"\n")
+	tstFrontMatterYAML     = nti(tFrontMatterYAML, "\nfoo: \"bar\"\n")
+	tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "\r\nfoo: \"bar\"\r\n")
 	tstFrontMatterJSON     = nti(tFrontMatterJSON, tstJSON+"\r\n")
 	tstSomeText            = nti(tText, "\nSome text.\n")
 	tstSummaryDivider      = nti(tSummaryDivider, "<!--more-->")
@@ -55,7 +55,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
 var frontMatterTests = []lexerTest{
 	{"empty", "", []Item{tstEOF}},
 	{"HTML Document", `  <html>  `, []Item{tstHTMLLead, nti(tText, "html>  "), tstEOF}},
+	{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
 	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
+	{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(tFrontMatterYAML, "\n"), tstSomeText, tstEOF}},
 	// Note that we keep all bytes as they are, but we need to handle CRLF
 	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
 	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
@@ -80,9 +82,10 @@ func TestFrontMatter(t *testing.T) {
 func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
 	l := newPageLexer(input, 0, stateStart)
 	l.run()
+	t := l.newTokens()
 
 	for {
-		item := l.nextItem()
+		item := t.nextItem()
 		items = append(items, item)
 		if item.typ == tEOF || item.typ == tError {
 			break