Skip to content

Commit

Permalink
Merge pull request #1656 from kmuto/review-header-listener
Browse files Browse the repository at this point in the history
Extract EPUBMaker#parse_headlines to add a test
  • Loading branch information
takahashim authored Jan 27, 2021
2 parents 725d0d4 + 164c3e5 commit e248f10
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 5 deletions.
15 changes: 11 additions & 4 deletions lib/review/epubmaker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -422,12 +422,19 @@ def detect_properties(path)
properties
end

def write_info_body(basetmpdir, _id, filename, ispart = nil, chaptype = nil)
def parse_headlines(path)
headlines = []

File.open(path) do |htmlio|
REXML::Document.parse_stream(htmlio, ReVIEWHeaderListener.new(headlines))
end

headlines
end

def write_info_body(basetmpdir, _id, filename, ispart = nil, chaptype = nil)
path = File.join(basetmpdir, filename)
htmlio = File.new(path)
REXML::Document.parse_stream(htmlio, ReVIEWHeaderListener.new(headlines))
htmlio.close
headlines = parse_headlines(path)

if headlines.empty?
warn "#{filename} is discarded because there is no heading. Use `=[notoc]' or `=[nodisp]' to exclude headlines from the table of contents."
Expand Down
11 changes: 10 additions & 1 deletion lib/review/epubmaker/reviewheaderlistener.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
#
module ReVIEW
class EPUBMaker
# Listener class to scan HTML and get heading information
#
# The heading information this listener will retrieve is as follows:
#
# * level: Heading level (1..6)
# * id: HTMl ID attribute. Basically the `id` attribute of the h(1-6) element, but if there is an `a` element within the h(1-6) element, it will be its `id` attribute.
# * title: The title string of the headline. Usually, it is the text within the h(1-6) element, but if there is an `img` element, it will be the text with its `alt` attribute.
# * notoc: The `notoc` attribute of the headline element.
#
class ReVIEWHeaderListener
include REXML::StreamListener
def initialize(headlines)
Expand All @@ -22,7 +31,7 @@ def tag_start(name, attrs)
@level = $1.to_i
@id = attrs['id'] if attrs['id'].present?
@notoc = attrs['notoc'] if attrs['notoc'].present?
elsif !@level.nil?
elsif @level.present? # if in <hN> tag
if name == 'img' && attrs['alt'].present?
@content << attrs['alt']
elsif name == 'a' && attrs['id'].present?
Expand Down
35 changes: 35 additions & 0 deletions test/assets/header_listener.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xmlns:ops="http://www.idpf.org/2007/ops" xml:lang="ja">
<head>
<meta charset="UTF-8" />
<link rel="stylesheet" type="text/css" href="style.css" />
<meta name="generator" content="Re:VIEW" />
<title>first chapter</title>
</head>
<body>
<h1><a id="h1"></a><span class="secno">第1章 </span>first chapter</h1>

<h2><a id="h1-1"></a><span class="secno">1.1 </span>first section</h2>

<h3><a id="h1-1-1"></a>first <img src="images/icon1.jpg" alt="subsection" /></h3>

<h2><a id="h1-2"></a><span class="secno">1.2 </span>second section</h2>

<h3 id="dummy1"><a id="h1-2-1"></a>dummy subsection</h3>

<h2><a id="h1-3"></a><span class="secno">1.3 </span>third section</h2>

<h2 id="ch01_nonum1" notoc="true">notoc section</h2>

<h2 id="dummy2" notoc="true">notoc section</h2>

<a id="ch01_nonum3" /><h2 id="ch01_nonum3" hidden="true">nodisp section</h2>

<a id="dummy3" /><h2 id="dummy3" hidden="true">nodisp section</h2>

<h2 id="ch01_nonum5">nonum section</h2>

<h2 id="dummy4">nonum section</h2>
</body>
</html>
49 changes: 49 additions & 0 deletions test/test_reviewheaderlistener.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
require 'test_helper'
require 'rexml/document'
require 'rexml/streamlistener'
require 'review/epubmaker'

class ReVIEWHeaderListenerTest < Test::Unit::TestCase
def setup
@epubmaker = ReVIEW::EPUBMaker.new
end

def teardown
end

def test_epubmaker_parse_headlines
# original Re:VIEW source:
#
# = first chapter
# == first section
# === first @<embed>{<img src="images/icon1.jpg" alt="subsection" />}
# == second section
# ==={dummy1} dummy subsection
# == third section
# ==[notoc] notoc section
# ==[notoc]{dummy2} notoc section
# ==[nodisp] nodisp section
# ==[nodisp]{dummy3} nodisp section
# ==[nonum] nonum section
# ==[nonum]{dummy4} nonum section
Dir.mktmpdir do |_dir|
path = File.join(assets_dir, 'header_listener.html')
headlines = @epubmaker.parse_headlines(path)

expected = [{ 'id' => 'h1', 'level' => 1, 'notoc' => nil, 'title' => '第1章 first chapter' },
{ 'id' => 'h1-1', 'level' => 2, 'notoc' => nil, 'title' => '1.1 first section' },
{ 'id' => 'h1-1-1', 'level' => 3, 'notoc' => nil, 'title' => 'first subsection' },
{ 'id' => 'h1-2', 'level' => 2, 'notoc' => nil, 'title' => '1.2 second section' },
{ 'id' => 'h1-2-1', 'level' => 3, 'notoc' => nil, 'title' => 'dummy subsection' },
{ 'id' => 'h1-3', 'level' => 2, 'notoc' => nil, 'title' => '1.3 third section' },
{ 'id' => 'ch01_nonum1', 'level' => 2, 'notoc' => 'true', 'title' => 'notoc section' },
{ 'id' => 'dummy2', 'level' => 2, 'notoc' => 'true', 'title' => 'notoc section' },
{ 'id' => 'ch01_nonum3', 'level' => 2, 'notoc' => nil, 'title' => 'nodisp section' },
{ 'id' => 'dummy3', 'level' => 2, 'notoc' => nil, 'title' => 'nodisp section' },
{ 'id' => 'ch01_nonum5', 'level' => 2, 'notoc' => nil, 'title' => 'nonum section' },
{ 'id' => 'dummy4', 'level' => 2, 'notoc' => nil, 'title' => 'nonum section' }]

assert_equal expected, headlines
end
end
end

0 comments on commit e248f10

Please sign in to comment.