Skip to content

Commit

Permalink
Modify the functions parse_tr() and parse_date() in utils.py with…
Browse files Browse the repository at this point in the history
… bug fixes
  • Loading branch information
mikeqfu committed Jan 7, 2021
1 parent 023ef1a commit e9b0815
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions pyrcs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utilities - Helper functions.
"""

import calendar
import collections
import datetime
import os
Expand All @@ -18,6 +19,7 @@
import requests
from pyhelpers.ops import confirmed, fake_requests_headers
from pyhelpers.store import load_json, load_pickle, save_json, save_pickle
from pyhelpers.text import find_similar_str


# -- Specification of resource homepage ------------------------------------------------
Expand Down Expand Up @@ -407,7 +409,6 @@ def parse_tr(header, trs):
row_spanned = []
for no, tr in enumerate(trs):
for td_no, rho in enumerate(tr.find_all('td')):
# print(data.has_attr("rowspan"))
if rho.has_attr('rowspan'):
row_spanned.append((no, int(rho['rowspan']), td_no, rho.text))

Expand All @@ -422,7 +423,7 @@ def parse_tr(header, trs):
for y in to_repeat:
for j in range(1, y[0]):
if y[2] in tbl_lst[i] and y[2] != '\xa0':
y[1] += np.abs(tbl_lst[i].index(y[2]) - y[1])
y[1] += np.abs(tbl_lst[i].index(y[2]) - y[1], dtype='int64')
tbl_lst[i + j].insert(y[1], y[2])

# if row_spanned:
Expand Down Expand Up @@ -618,8 +619,13 @@ def parse_date(str_date, as_date_type=False):
<class 'datetime.date'>
"""

temp_date = dateutil.parser.parse(str_date, fuzzy=True)
# or, temp_date = datetime.strptime(last_update_date[12:], '%d %B %Y')
try:
temp_date = dateutil.parser.parse(str_date, fuzzy=True)
# or, temp_date = datetime.datetime.strptime(str_date[12:], '%d %B %Y')
except (TypeError, calendar.IllegalMonthError):
month_name = find_similar_str(str_date, calendar.month_name)
err_month_ = find_similar_str(month_name, str_date.split(' '))
temp_date = dateutil.parser.parse(str_date.replace(err_month_, month_name), fuzzy=True)

parsed_date = temp_date.date() if as_date_type else str(temp_date.date())

Expand Down

0 comments on commit e9b0815

Please sign in to comment.