Skip to content

Commit

Permalink
fix(ingest/bigquery): changes helper function to decode unicode scape…
Browse files Browse the repository at this point in the history
… sequences
  • Loading branch information
PatrickfBraz committed Jul 3, 2024
1 parent 80e5a64 commit 0eb948e
Showing 1 changed file with 9 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ def unquote_and_decode_unicode_escape_seq(
if string.startswith(leading_quote) and string.endswith(trailing_quote):
string = string[1:-1]

cleaned_string = string.encode().decode("unicode-escape")

return cleaned_string
# Decode Unicode escape sequences. This avoid issues with encoding
while string.find("\\u") >= 0:
index = string.find("\\u") # The first occurrence of the substring
unicode_seq = string[index: (index + 6)] # The Unicode escape sequence
# Replace the Unicode escape sequence with the decoded character
string = string.replace(
unicode_seq, unicode_seq.encode("utf-8").decode("unicode-escape")
)
return string


def parse_labels(labels_str: str) -> Dict[str, str]:
Expand Down

0 comments on commit 0eb948e

Please sign in to comment.