Skip to content

Commit

Permalink
Merge pull request #804 from metanorma/fix/url-encode-urls
Browse files Browse the repository at this point in the history
ensure URIs in links are correctly URI-encoded rather than incorrectl…
  • Loading branch information
opoudjis authored Aug 2, 2023
2 parents ab2ba9a + 09ecc66 commit 70126fb
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 224 deletions.
1 change: 1 addition & 0 deletions lib/metanorma/standoc/cleanup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def cleanup(xmldoc)
empty_element_cleanup(xmldoc)
img_cleanup(xmldoc)
anchor_cleanup(xmldoc)
link_cleanup(xmldoc)
xmldoc
end

Expand Down
21 changes: 17 additions & 4 deletions lib/metanorma/standoc/cleanup_inline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def concept_cleanup(xmldoc)
def concept_cleanup1(elem)
elem.children.remove if elem&.children&.text&.strip&.empty?
key_extract_locality(elem)
if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
if elem["key"].include?(":") then concept_termbase_cleanup(elem)
elsif refid? elem["key"] then concept_eref_cleanup(elem)
else concept_xref_cleanup(elem)
end
Expand All @@ -95,15 +95,14 @@ def related_cleanup(xmldoc)
end

def key_extract_locality(elem)
return unless /,/.match?(elem["key"])

elem["key"].include?(",") or return
elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>")
elem["key"] = elem["key"].sub(/,.*$/, "")
end

def concept_termbase_cleanup(elem)
t = elem&.at("./xrefrender")&.remove&.children
termbase, key = elem["key"].split(/:/, 2)
termbase, key = elem["key"].split(":", 2)
elem.add_child(%(<termref base="#{termbase}" target="#{key}">) +
"#{t&.to_xml}</termref>")
end
Expand Down Expand Up @@ -207,6 +206,20 @@ def passthrough_cleanup(doc)
end
end

def link_cleanup(xmldoc)
xmldoc.xpath("//link[@target]").each do |l|
ret = CGI.unescape(l["target"]).split(%r((://+)), 2)
ret[-1, 1] = ret[-1].split(%r{(/+)}).map do |x|
x.include?("/") ? x : uri_component_encode(x)
end
l["target"] = ret.join
end
end

def uri_component_encode(comp)
CGI.escape(comp).gsub("+", "%20")
end

private

# skip ZWNJ inserted to prevent regexes operating in asciidoctor
Expand Down
Loading

0 comments on commit 70126fb

Please sign in to comment.