Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
#364: WHED appendix title handline rewritten to complement #359
Browse files Browse the repository at this point in the history
  • Loading branch information
gregoryfoster committed Apr 5, 2017
1 parent 0c650cd commit a389c4b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 16 deletions.
16 changes: 2 additions & 14 deletions interpparser/gpo_cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from regparser.tree.depth import markers as mtypes
from regparser.tree.depth import heuristics, rules
from regparser.tree.depth.derive import derive_depths
from regparser.tree.gpo_cfr.appendices import appendix_headers
from regparser.tree.gpo_cfr import appendices
from regparser.tree.struct import Node, treeify
from regparser.tree.xml_parser import matchers, tree_utils

Expand Down Expand Up @@ -292,26 +292,14 @@ def per_node(node):

def build_supplement_tree(reg_part, node):
""" Build the tree for the supplement section. """
title = tree_utils.get_node_text(appendix_headers(node)[0])
root = Node(
node_type=Node.INTERP,
label=[reg_part, Node.INTERP_MARK],
title=title)
title=appendices.get_appendix_title(node))

return parse_from_xml(root, node.getchildren())


@matchers.match_tag('INTERP')
def parse_interp(parent, xml_node):
parent.children.append(build_supplement_tree(parent.cfr_part, xml_node))


def get_app_title(node):
""" Appendix/Supplement sections have the title in an HD tag, or
if they are reserved, in a <RESERVED> tag. Extract the title. """

titles = node.xpath("./HD[@SOURCE='HED']")
if titles:
return titles[0].text
else:
return node.xpath("./RESERVED")[0]
4 changes: 2 additions & 2 deletions interpparser/preprocessors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from interpparser.gpo_cfr import get_app_title
from regparser.tree.gpo_cfr import appendices

_CONTAINS_SUPPLEMENT = "contains(., 'Supplement I')"
_SUPPLEMENT_HD = "//REGTEXT//HD[@SOURCE='HD1' and {0}]".format(
Expand Down Expand Up @@ -28,6 +28,6 @@ def supplement_amdpar(xml):
def appendix_to_interp(xml):
"""Convert Supplement I APPENDIX tags to INTERP"""
for appendix in xml.xpath('.//APPENDIX'):
section_title = get_app_title(appendix)
section_title = appendices.get_appendix_title(appendix)
if 'Supplement' in section_title and 'Part' in section_title:
appendix.tag = 'INTERP'
5 changes: 5 additions & 0 deletions regparser/tree/gpo_cfr/appendices.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,13 @@ def remove_toc(appendix, letter):


def appendix_headers(node):
""" Retrieve Appendix/Supplement section HD, WHED, and RESERVED tags. """
return node.xpath('./RESERVED|./HD[@SOURCE="HED"]|./WHED')

def get_appendix_title(node):
""" Retrieve the first Appendix/Supplement title from its headers. """
return tree_utils.get_node_text(appendix_headers(node)[0])


_first_markers = [re.compile(r'[\)\.|,|;|-|—]\s*\(' + lvl[0] + r'\)')
for lvl in p_levels]
Expand Down

0 comments on commit a389c4b

Please sign in to comment.