Skip to content

Commit

Permalink
EPUB: forward dir and lang attributes from the html element
Browse files Browse the repository at this point in the history
In EPUB, the <html> node of each embedded HTML file
is not included in the generated single DOM.
We now parse its attributes and forward them to be
included as attribute of the followup <docFragment>
element, so they are part of the DOM.
  • Loading branch information
poire-z committed Sep 14, 2019
1 parent ed2c999 commit 12e542e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
10 changes: 9 additions & 1 deletion crengine/include/lvtinydom.h
Original file line number Diff line number Diff line change
Expand Up @@ -2456,6 +2456,10 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
lString8 headStyleText;
int headStyleState;

lString16 htmlDir;
lString16 htmlLang;
bool insideHtmlTag;

public:

/// return content of html/head/style element
Expand Down Expand Up @@ -2486,6 +2490,9 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
insideTag = false;
headStyleText.clear();
headStyleState = 0;
insideHtmlTag = false;
htmlDir.clear();
htmlLang.clear();
}
/// called on parsing end
virtual void OnStop()
Expand Down Expand Up @@ -2525,7 +2532,8 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
/// constructor
ldomDocumentFragmentWriter( LVXMLParserCallback * parentWriter, lString16 baseTagName, lString16 baseTagReplacementName, lString16 fragmentFilePath )
: parent(parentWriter), baseTag(baseTagName), baseTagReplacement(baseTagReplacementName),
insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL), headStyleState(0)
insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL),
headStyleState(0), insideHtmlTag(false)
{
setCodeBase( fragmentFilePath );
}
Expand Down
25 changes: 23 additions & 2 deletions crengine/src/lvtinydom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10043,7 +10043,15 @@ void ldomDocumentFragmentWriter::OnAttribute( const lChar16 * nsname, const lCha
parent->OnAttribute(nsname, attrname, attrvalue);
}
} else {
if ( styleDetectionState ) {
if (insideHtmlTag) {
// Grab attributes from <html dir="rtl" lang="he"> (not included in the DOM)
// to reinject them in <DocFragment>
if ( !lStr_cmp(attrname, "dir") )
htmlDir = attrvalue;
else if ( !lStr_cmp(attrname, "lang") )
htmlLang = attrvalue;
}
else if ( styleDetectionState ) {
if ( !lStr_cmp(attrname, "rel") && lString16(attrvalue).lowercase() == L"stylesheet" )
styleDetectionState |= 2;
else if ( !lStr_cmp(attrname, "type") ) {
Expand Down Expand Up @@ -10079,8 +10087,13 @@ ldomNode * ldomDocumentFragmentWriter::OnTagOpen( const lChar16 * nsname, const
} else {
if ( !lStr_cmp(tagname, "link") )
styleDetectionState = 1;
if ( !lStr_cmp(tagname, "style") )
else if ( !lStr_cmp(tagname, "style") )
headStyleState = 1;
else if ( !lStr_cmp(tagname, "html") ) {
insideHtmlTag = true;
htmlDir.clear();
htmlLang.clear();
}
}

// When meeting the <body> of each of an EPUB's embedded HTML files,
Expand Down Expand Up @@ -10115,6 +10128,11 @@ ldomNode * ldomDocumentFragmentWriter::OnTagOpen( const lChar16 * nsname, const
}
if ( !codeBasePrefix.empty() ) // add attribute <DocFragment id="..html_file_name"
parent->OnAttribute(L"", L"id", codeBasePrefix.c_str() );
if ( !htmlDir.empty() ) // add attribute <DocFragment dir="rtl" from <html dir="rtl"> tag
parent->OnAttribute(L"", L"dir", htmlDir.c_str() );
if ( !htmlLang.empty() ) // add attribute <DocFragment lang="ar" from <html lang="ar"> tag
parent->OnAttribute(L"", L"lang", htmlLang.c_str() );

parent->OnTagBody(); // inside <DocFragment>
if ( !headStyleText.empty() || stylesheetLinks.length() > 0 ) {
// add stylesheet element as child of <DocFragment>: <stylesheet href="...">
Expand Down Expand Up @@ -10178,6 +10196,9 @@ void ldomDocumentFragmentWriter::OnTagBody()
if ( insideTag ) {
parent->OnTagBody();
}
else if ( insideHtmlTag ) {
insideHtmlTag = false;
}
if ( styleDetectionState == 11 ) {
// incomplete <link rel="stylesheet", href="..." />; assuming type="text/css"
if ( !stylesheetFile.empty() )
Expand Down

0 comments on commit 12e542e

Please sign in to comment.