Skip to content

Commit

Permalink
debug [skip-ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed May 8, 2024
1 parent 51f60de commit d61f2d1
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 1 deletion.
55 changes: 54 additions & 1 deletion apis/python/src/tiledbsoma/_arrow_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,17 +263,63 @@ def df_to_arrow(df: pd.DataFrame) -> pa.Table:
# will later be appending, or updating, with null data.
# * Note that Arrow has a per-field nullable flag in its schema metadata
# -- and so do TileDB array schemas.
print()
print()
print("DF")
print(df.dtypes)
print()
print()

print()
print()
print("NF1")
print(nullable_fields)
print()
print()

for key in df:
if df[key].dtype.name == "object":
print(">>>>", key, df[key].dtype.name)
if df[key].dtype.name in ["object", "string"]:
print("]]]]", key, "YES")
nullable_fields.add(key)
else:
print("]]]]", key, "NO")

print()
print()
print("DF")
print(df)
print()
print()

print()
print()
print("NF2")
print(nullable_fields)
print()
print()

arrow_table = pa.Table.from_pandas(df)

print()
print()
print("AT1")
print(arrow_table.schema)
print()
print()

if nullable_fields:
md = arrow_table.schema.metadata
md.update(dict.fromkeys(nullable_fields, "nullable"))
arrow_table = arrow_table.replace_schema_metadata(md)

print()
print()
print("AT2")
print(arrow_table.schema)
print()
print()

# For tiledbsoma.io (for which this method exists) _any_ dataset can be appended to
# later on. This means that on fresh ingest we must use a larger bit-width than
# the bare minimum necessary.
Expand All @@ -297,6 +343,13 @@ def df_to_arrow(df: pd.DataFrame) -> pa.Table:

arrow_table = pa.Table.from_pandas(df, schema=new_schema)

print()
print()
print("AT3")
print(arrow_table.schema)
print()
print()

return arrow_table


Expand Down
7 changes: 7 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,13 @@ def write(

clib_dataframe = self._handle._handle

print()
print()
print("VSDF SELF.SCHEMA")
print(self.schema)
print()
print()

values = _util.cast_values_to_target_schema(clib_dataframe, values, self.schema)

for batch in values.to_batches():
Expand Down
22 changes: 22 additions & 0 deletions apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,30 @@ def cast_values_to_target_schema(
target_schema.append(target_field.with_type(pa.uint8()))
else:
target_schema.append(target_field)

print()
print()
print("CCCC1")
print(target_schema)
print()
print()

new_schema = pa.schema(target_schema, values.schema.metadata)

print()
print()
print("CCCC2a")
print(values)
print()
print()

print()
print()
print("CCCC2b")
print(new_schema)
print()
print()

return values.cast(new_schema)


Expand Down
34 changes: 34 additions & 0 deletions apis/python/tests/test_update_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,37 @@ def test_change_counts(
verify_updates(experiment_path, new_obs2, new_var2, exc=ValueError)
verify_obs_and_var_eq(old_anndata, new_anndata)
verify_schemas(experiment_path, obs_schema, var_schema)


# @pytest.mark.parametrize("multiple_fixtures_with_readback", [False, True], indirect=True)
@pytest.mark.parametrize("multiple_fixtures_with_readback", [False], indirect=True)
# @pytest.mark.parametrize("multiple_fixtures_with_readback", [True], indirect=True)
def test_update_non_null_to_null(exp_path, new_obs, new_var, o1, v1):
print()
print()
print("NEW_OBS1")
print(new_obs)
print()
print()

new_obs["batch_id"] = "testing"

print()
print()
print("NEW_OBS2")
print(new_obs)
print()
print()

verify_updates(exp_path, new_obs, new_var)

new_obs["batch_id"] = pd.NA

print()
print()
print("NEW_OBS3")
print(new_obs)
print()
print()

verify_updates(exp_path, new_obs, new_var)

0 comments on commit d61f2d1

Please sign in to comment.