Skip to content

Commit

Permalink
Some more arrowload info messages
Browse files Browse the repository at this point in the history
  • Loading branch information
rebkwok committed Jun 16, 2023
1 parent c080713 commit 3651149
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions python_scripts/load_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ def make_vars(self, pre_processed_column_types, batch):
# This is the first batch
# Loop over the variable name / variable type mappings and add
# variables with the appropriate typing based on the vartype
self.display(f"Creating variables: {', '.join(self.column_types.keys())}")
for varname, vartype in self.column_types.items():
if vartype == "string":
max_length = max(len(val.as_py()) for (val) in batch[varname])
Expand Down Expand Up @@ -462,7 +463,10 @@ def make_vars(self, pre_processed_column_types, batch):
if self.column_types[col] != pre_processed_column_types[col]
}
for changed_col, changed_type in changed_cols.items():
self.display(f"Converting {changed_cols}")
self.display(
f"Converting '{changed_col}' from {pre_processed_column_types[changed_col]}"
" to {changed_type}"
)
if changed_type == "string":
# If the variable has changed in a subsequent batch to string type, it
# means it was previously considered integer type and is now too big to
Expand Down Expand Up @@ -524,12 +528,13 @@ def replace_stata_missing_and_recast(self):
"""
# for byte/int/long variables, replace the missing value with stata missing and recast
# the variable to the type we expect it to be
self.display("Finalising missing values for integer-type columns...")
column_type_mappings = {"boolean": "byte", "date": "long"}
for column_name, column_type in self.column_types.items():
if column_type not in ["boolean", "byte", "int", "long", "date"]:
continue
column_type = column_type_mappings.get(column_type, column_type)
self.display(f"Finalising column '{column_name}' (type ({column_type})")
self.display(f"- column '{column_name}' (type {column_type})")
self.run_stata_command(
f"replace {column_name} = . if {column_name} == {self.MISSING_VALUES[column_type]}"
)
Expand Down

0 comments on commit 3651149

Please sign in to comment.