Skip to content

Commit

Permalink
Fix a bug in parse_tr()
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeqfu committed Nov 11, 2022
1 parent e865cf8 commit 3348c9c
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions pyrcs/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,20 @@


def _parse_other_tags_in_td_contents(td_content):
if not isinstance(td_content, str):
if isinstance(td_content, str):
td_text = td_content

else:
tag_name = td_content.name
td_text = td_content.get_text()

if td_content.name == 'em':
if tag_name == 'em':
td_text = f'[{td_text}]'
elif td_content.name == 'q':
elif tag_name == 'q':
td_text = f'"{td_text}"'

else:
td_text = td_content
elif tag_name == 'span':
if td_content.get('class') == ['r']:
td_text = f'\t\t{td_text}'

return td_text

Expand Down Expand Up @@ -90,6 +94,8 @@ def parse_tr(trs, ths, as_dataframe=False):

for td_no, td in enumerate(tds):
text = ''.join([_parse_other_tags_in_td_contents(x) for x in td.contents])
if text.startswith('\t\t('):
text = text[text.find(')') + 1:]+text[:text.find(')') + 1]
# if '/\r\n' in text or '\r\n' in text:
# txt = text.replace('/\r\n', ' / ').replace('\r\n', ' / ')
# elif '\n' in text:
Expand Down

0 comments on commit 3348c9c

Please sign in to comment.