Skip to content

Commit

Permalink
iterate on string vs large_string
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 11, 2022
1 parent 124e275 commit 324654b
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 9 deletions.
4 changes: 3 additions & 1 deletion apis/python/src/tiledbsoma/util_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
# IMPORTANT: ALL non-primitive types supported by TileDB must be in this table.
#
pa.string(): "ascii", # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
pa.large_string(): "ascii", # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
pa.binary(): np.dtype("S"),
pa.large_binary(): np.dtype("S"),
pa.timestamp("s"): "datetime64[s]",
pa.timestamp("ms"): "datetime64[ms]",
pa.timestamp("us"): "datetime64[us]",
Expand Down Expand Up @@ -90,7 +92,7 @@ def get_arrow_type_from_tiledb_dtype(tiledb_dtype: Union[str, np.dtype]) -> pa.D
"""
if tiledb_dtype == "ascii" or tiledb_dtype.name == "bytes":
# XXX TODO: temporary work-around until UTF8 support is native. GH #338.
return pa.string()
return pa.large_string()
else:
return pa.from_numpy_dtype(tiledb_dtype)

Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_soma_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def create_and_populate_dataframe(dataframe: soma.SOMADataFrame) -> None:
[
("foo", pa.int32()),
("bar", pa.float64()),
("baz", pa.string()),
("baz", pa.large_string()),
]
)

Expand Down Expand Up @@ -108,7 +108,7 @@ def soma_object(request, tmp_path):

elif class_name == "SOMADataFrame":
so = soma.SOMADataFrame(uri=uri)
so.create(pa.schema([("A", pa.int32()), ("B", pa.string())]))
so.create(pa.schema([("A", pa.int32()), ("B", pa.large_string())]))

elif class_name == "SOMAIndexedDataFrame":
so = soma.SOMAIndexedDataFrame(uri=uri)
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_soma_dataframe_non_indexed(tmp_path):
[
("foo", pa.int32()),
("bar", pa.float64()),
("baz", pa.string()),
("baz", pa.large_string()),
]
)
sdf.create(schema=asch)
Expand Down Expand Up @@ -120,7 +120,7 @@ def simple_soma_data_frame(tmp_path):
("soma_rowid", pa.uint64()),
("A", pa.int64()),
("B", pa.float64()),
("C", pa.string()),
("C", pa.large_string()),
]
)
sdf = t.SOMADataFrame(uri=tmp_path.as_posix())
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_soma_experiment_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def create_and_populate_obs(obs: soma.SOMADataFrame) -> soma.SOMADataFrame:
[
("foo", pa.int32()),
("bar", pa.float64()),
("baz", pa.string()),
("baz", pa.large_string()),
]
)

Expand All @@ -37,7 +37,7 @@ def create_and_populate_var(var: soma.SOMADataFrame) -> soma.SOMADataFrame:

var_arrow_schema = pa.schema(
[
("quux", pa.string()),
("quux", pa.large_string()),
("xyzzy", pa.float64()),
]
)
Expand Down
10 changes: 9 additions & 1 deletion apis/python/tests/test_soma_indexed_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ def _schema():
def test_soma_indexed_dataframe(tmp_path, arrow_schema):
sdf = t.SOMAIndexedDataFrame(uri=tmp_path.as_posix())

asch = pa.schema(
[
("foo", pa.int32()),
("bar", pa.float64()),
("baz", pa.large_string()),
]
)

# Create
asch = arrow_schema()
sdf.create(schema=asch, index_column_names=["foo"])
Expand Down Expand Up @@ -72,7 +80,7 @@ def simple_soma_indexed_data_frame(tmp_path):
("index", pa.uint64()),
("A", pa.int64()),
("B", pa.float64()),
("C", pa.string()),
("C", pa.large_string()),
]
)
index_column_names = ["index"]
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_soma_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def soma_object(request, tmp_path):

elif class_name == "SOMADataFrame":
so = soma.SOMADataFrame(uri=uri)
so.create(pa.schema([("A", pa.int32()), ("B", pa.string())]))
so.create(pa.schema([("A", pa.int32()), ("B", pa.large_string())]))

elif class_name == "SOMAIndexedDataFrame":
so = soma.SOMAIndexedDataFrame(uri=uri)
Expand Down
2 changes: 2 additions & 0 deletions apis/python/tests/test_type_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
pa.timestamp("us"),
pa.timestamp("ns"),
pa.string(),
pa.large_string(),
pa.binary(),
pa.large_binary(),
]


Expand Down

0 comments on commit 324654b

Please sign in to comment.