Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Markdown parser (#11) #140

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/coradoc/parser.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require_relative "parser/base"
require_relative "parser/markdown"

module Coradoc
module Parser
Expand Down
159 changes: 159 additions & 0 deletions lib/coradoc/parser/markdown.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
require "parslet"
require "parslet/convenience"

module Coradoc
module Parser
module ParsletExtras
refine Parslet::Scope do
attr_reader :current

def has_key?(...)
@current.has_key?(...)
end
end

refine Parslet::Scope::Binding do
def has_key?(...)
@hash.has_key?(...)
end

def initialize_copy(original)
super
@hash = @hash.clone
end
end

# like Named but returning other things
# NOTE: Parslet only accepts hashes and arrays
class Output < Parslet::Atoms::Base
attr_reader :parslet, :value
def initialize(parslet, value)
super()

@parslet, @value = parslet, value
end

def apply(source, context, consume_all)
success, _ = result = parslet.apply(source, context, consume_all)

return result unless success
succ(@value)
end

def to_s_inner(prec)
"#{value}:#{parslet.to_s(prec)}"
end
end

refine Parslet::Atoms::DSL do
def output(value)
Output.new(self, value)
end
end
end

class Markdown < Parslet::Parser
using ParsletExtras

def line_ending
str("\n") | str("\r\n") | str("\r")
end

def debug(msg)
dynamic do |src, ctx|
puts "#{msg} @ #{src.line_and_column}:"
pp ctx.captures
any.present? | any.absent?
end
end

rule(:non_indent_space) { str(" ").repeat(0, 3) }

rule(:whitespace) { match[" \t"] }
rule(:blank_line) { whitespace.repeat(1) >> any.absent? | whitespace.repeat >> line_ending }
rule(:line_char) { match["^\r\n"] }
rule(:line) { line_char.repeat(1).as(:ln) >> any.absent? | line_char.repeat.as(:ln) >> line_ending }

# MUST NOT be a `rule`, otherwise gets cached in a failure state and prevents nested alternatives from working
def continuation
dynamic do |src, ctx|
# puts "parsing continuation at #{src.line_and_column} (#{src.bytepos}) with #{ctx.captures[:cont]}"
ctx.captures[:cont]
end
end

def open_block(kind, cont_rule)
dynamic do |src, ctx|
parent_scope = ctx.captures.current.parent
ctx.captures[:cont] = cont_rule
ctx.captures[:cont] = parent_scope[:cont] >> cont_rule if parent_scope.has_key?(:cont)
# puts "starting block #{kind} at #{src.line_and_column} (#{src.bytepos}): #{ctx.captures[:cont]}"
any.present? | any.absent?
end
end

rule(:atx_ending_seq) do
whitespace.repeat(1) >> str("#").repeat >> whitespace.repeat >> line_ending.present?
end

rule(:atx_heading) do
non_indent_space >> str("#").repeat(1, 6).as(:heading) >> str("#").absent? >>
(
# first check to catch only one space (that would be consumed with the repeat(1)) until ending seq
atx_ending_seq.absent? >> str(" ").repeat(1) >> (atx_ending_seq.absent? >> line_char).repeat(1).as(:text)
).maybe >> atx_ending_seq.maybe >> line_ending
end

def thematic_break_char(c)
(str(c) >> whitespace.repeat).repeat(3)
end

rule(:thematic_break) do
non_indent_space >> (thematic_break_char("-") | thematic_break_char("_") | thematic_break_char("*")).output(hr: true) >> line_ending
end

# TODO: actually verbatim, not paragraph lines
rule(:indented_code_block) do
str(" ") >> scope do
open_block(:indented_code, str(" ")) >> (line >> (continuation >> line).repeat).as(:code_block)
end
end

rule(:block_quote_marker) do
non_indent_space >> str(">") >> str(" ").maybe
end

rule(:block_quote) do
block_quote_marker >> scope do
open_block(:block_quote, block_quote_marker) >> (block >> (continuation >> block).repeat).as(:block_quote)
end
end

rule(:paragraph_interrupt) do
blank_line | atx_heading | thematic_break | block_quote
end

rule(:paragraph) do
paragraph_interrupt.absent? >> scope do
open_block(:paragraph, paragraph_interrupt.absent?) >> non_indent_space >> (line >> (continuation >> line).repeat).as(:p)
end
end

rule(:block) do
blank_line | atx_heading | thematic_break | indented_code_block | block_quote | paragraph
end

root :document
rule(:document) do
block.repeat
end

def self.parse(filename)
content = File.read(filename)
new.parse(content)
rescue Parslet::ParseFailed => e
puts e.parse_failure_cause.ascii_tree
end
end
end
end
177 changes: 177 additions & 0 deletions spec/coradoc/parser/markdown/atx_headings_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
require "spec_helper"

RSpec.describe Coradoc::Parser::Markdown do
describe "ATX headings" do
it "parses example 62" do
expect(subject.parse_with_debug(%q(
# foo
## foo
### foo
#### foo
##### foo
###### foo
))).to eq([
{heading: "#", text: "foo"},
{heading: "##", text: "foo"},
{heading: "###", text: "foo"},
{heading: "####", text: "foo"},
{heading: "#####", text: "foo"},
{heading: "######", text: "foo"},
])
end

it "parses example 63" do
expect(subject.parse_with_debug(%q(
####### foo
))).to eq([
{p: {ln: "####### foo"}}
])
end

it "parses example 64" do
expect(subject.parse_with_debug(%q(
#5 bolt

#hashtag
))).to eq([
{p: {ln: "#5 bolt"}},
{p: {ln: "#hashtag"}},
])
end

pending "parses example 65"
pending "parses example 66"

it "parses example 67" do
expect(subject.parse_with_debug(%q(
# foo
))).to eq([
{heading: "#", text: "foo"},
])
end

it "parses example 68" do
expect(subject.parse_with_debug(%q(
### foo
## foo
# foo
))).to eq([
{heading: "###", text: "foo"},
{heading: "##", text: "foo"},
{heading: "#", text: "foo"},
])
end

it "parses example 69" do
expect(subject.parse_with_debug(%q(
# foo
))).to eq([
{code_block: [{ln: "# foo"}, {ln: " "}]}
])
end

it "parses example 70" do
expect(subject.parse_with_debug(%q(
foo
# bar
))).to eq([
{p: [{ln: "foo"}, {ln: " # bar"}]}
])
end

it "parses example 71" do
expect(subject.parse_with_debug(%q(
## foo ##
### bar ###
))).to eq([
{heading: "##", text: "foo"},
{heading: "###", text: "bar"},
])
end

it "parses example 72" do
expect(subject.parse_with_debug(%q(
# foo ##################################
##### foo ##
))).to eq([
{heading: "#", text: "foo"},
{heading: "#####", text: "foo"},
])
end

it "parses example 73" do
expect(subject.parse_with_debug(%q(
### foo ###
))).to eq([
{heading: "###", text: "foo"},
])
end

it "parses example 74" do
expect(subject.parse_with_debug(%q(
### foo ### b
))).to eq([
{heading: "###", text: "foo ### b"},
])
end

it "parses example 75" do
expect(subject.parse_with_debug(%q(
# foo#
))).to eq([
{heading: "#", text: "foo#"},
])
end

it "parses example 76" do
pending "escapes"
expect(subject.parse_with_debug(%q(
### foo \###
## foo #\##
# foo \#
))).to eq([
{heading: "###", text: "foo ###"},
{heading: "##", text: "foo ###"},
{heading: "#", text: "foo #"},
])
end

it "parses example 77" do
expect(subject.parse_with_debug(%q(
****
## foo
****
))).to eq([
{hr: true},
{heading: "##", text: "foo"},
{hr: true},
])
end

it "parses example 78" do
expect(subject.parse_with_debug(%q(
Foo bar
# baz
Bar foo
))).to eq([
{p: {ln: "Foo bar"}},
{heading: "#", text: "baz"},
{p: {ln: "Bar foo"}},
])
end

it "parses example 79" do
expect(subject.parse_with_debug(%q(
##
#
### ###
))).to eq([
{heading: "##"},
{heading: "#"},
{heading: "###"},
])
end

end
end

Loading
Loading