Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge pull request #367 from gregoryfoster/37_cfr_1_appendix_title
Browse files Browse the repository at this point in the history
#364: WHED appendix title handling rewritten to complement #359
  • Loading branch information
cmc333333 authored Apr 6, 2017
2 parents 0c650cd + e06cee9 commit 89535d0
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 18 deletions.
18 changes: 3 additions & 15 deletions interpparser/gpo_cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from regparser.tree.depth import markers as mtypes
from regparser.tree.depth import heuristics, rules
from regparser.tree.depth.derive import derive_depths
from regparser.tree.gpo_cfr.appendices import appendix_headers
from regparser.tree.gpo_cfr import appendices
from regparser.tree.struct import Node, treeify
from regparser.tree.xml_parser import matchers, tree_utils

Expand Down Expand Up @@ -291,27 +291,15 @@ def per_node(node):


def build_supplement_tree(reg_part, node):
""" Build the tree for the supplement section. """
title = tree_utils.get_node_text(appendix_headers(node)[0])
"""Build the tree for the supplement section."""
root = Node(
node_type=Node.INTERP,
label=[reg_part, Node.INTERP_MARK],
title=title)
title=appendices.get_appendix_title(node))

return parse_from_xml(root, node.getchildren())


@matchers.match_tag('INTERP')
def parse_interp(parent, xml_node):
parent.children.append(build_supplement_tree(parent.cfr_part, xml_node))


def get_app_title(node):
""" Appendix/Supplement sections have the title in an HD tag, or
if they are reserved, in a <RESERVED> tag. Extract the title. """

titles = node.xpath("./HD[@SOURCE='HED']")
if titles:
return titles[0].text
else:
return node.xpath("./RESERVED")[0]
6 changes: 3 additions & 3 deletions interpparser/preprocessors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from interpparser.gpo_cfr import get_app_title
from regparser.tree.gpo_cfr import appendices

_CONTAINS_SUPPLEMENT = "contains(., 'Supplement I')"
_SUPPLEMENT_HD = "//REGTEXT//HD[@SOURCE='HD1' and {0}]".format(
Expand Down Expand Up @@ -26,8 +26,8 @@ def supplement_amdpar(xml):


def appendix_to_interp(xml):
"""Convert Supplement I APPENDIX tags to INTERP"""
"""Convert Supplement I APPENDIX tags to INTERP."""
for appendix in xml.xpath('.//APPENDIX'):
section_title = get_app_title(appendix)
section_title = appendices.get_appendix_title(appendix)
if 'Supplement' in section_title and 'Part' in section_title:
appendix.tag = 'INTERP'
6 changes: 6 additions & 0 deletions regparser/tree/gpo_cfr/appendices.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,15 @@ def remove_toc(appendix, letter):


def appendix_headers(node):
""" Retrieve Appendix/Supplement section HD, WHED, and RESERVED tags. """
return node.xpath('./RESERVED|./HD[@SOURCE="HED"]|./WHED')


def get_appendix_title(node):
""" Retrieve the first Appendix/Supplement title from its headers. """
return tree_utils.get_node_text(appendix_headers(node)[0])


_first_markers = [re.compile(r'[\)\.|,|;|-|—]\s*\(' + lvl[0] + r'\)')
for lvl in p_levels]

Expand Down

0 comments on commit 89535d0

Please sign in to comment.