Skip to content

Commit

Permalink
Merge pull request #8 from glencoesoftware/queries
Browse files Browse the repository at this point in the history
Add query support
  • Loading branch information
emilroz authored Nov 17, 2023
2 parents a2158f5 + 29c9081 commit f99cc8e
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 4 deletions.
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,36 @@ with omero2pandas.OMEROConnection(server='my.server', port=4064,
```

The context manager will handle session creation and cleanup automatically.

### Querying tables

You can also supply [PyTables condition syntax](https://www.pytables.org/usersguide/condition_syntax.html) to the `read_table` and `download_table` functions.
Returned tables will only include rows which pass this filter.

**Basic syntax**
Select rows representing objects with area greater than 20:
```python
omero2pandas.read_table(file_id=10, query='(area>20)')
```

**Multiple conditions**

Select rows representing objects with an even ID number lower than 50:
```python
omero2pandas.read_table(file_id=10, query='(id%2==0) & (id<50)')
```

**Complex conditions**

Select rows representing objects which originated from an ROI named 'Nucleus':
```python
omero2pandas.read_table(file_id=10, query='x!="Nucleus"', variables={'x': omero.rtypes.rstring('Roi Name')})
```

N.b. Column names containing spaces aren't supported by the native syntax, but can be supplied as variables which are provided by the `variables` parameter.

The variables map needs to be a dictionary mapping string variables to [OMERO rtypes](https://omero.readthedocs.io/en/v5.6.9/developers/GettingStarted/AdvancedClientDevelopment.html#rtypes) objects rather than raw Python objects.
These should match the relevant column type. Mapped variables are substituted into the query during processing.

A `variables` map usually isn't needed for simple queries. The basic condition string should automatically get converted to a meaningful type, but when this fails
replacing tricky elements with a variable may help.
38 changes: 34 additions & 4 deletions omero2pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_table_columns(file_id=None, annotation_id=None,

def read_table(file_id=None, annotation_id=None, column_names=(), rows=None,
chunk_size=1000, omero_connector=None, server=None, port=4064,
username=None, password=None):
username=None, password=None, query=None, variables=None):
"""
Gets table data from the server.
Supply either a file or annotation ID.
Expand All @@ -110,8 +110,16 @@ def read_table(file_id=None, annotation_id=None, column_names=(), rows=None,
Default None = load all rows.
:param column_names: Iterable of column name strings to load.
Default None = load all columns.
:param query: String containing the PyTables query which would return a
subset of rows from the table. Only rows which pass this query will be
returned. Cannot be used with the 'rows' parameter.
:param variables: Dictionary containing variables to map onto the query
string.
:return: pandas.DataFrame object containing requested data
"""
if rows is not None and query is not None:
raise ValueError("Running a query supersedes the rows argument. "
"Please only supply one.")
object_id, object_type = _validate_requested_object(
file_id=file_id, annotation_id=annotation_id)

Expand All @@ -134,7 +142,14 @@ def read_table(file_id=None, annotation_id=None, column_names=(), rows=None,
else:
target_cols = range(len(heads))
# Determine requested rows
if rows is None:
if query is not None:
if variables is None:
variables = {}
rows = data_table.getWhereList(condition=query,
variables=variables,
start=0, stop=-1, step=1)
num_rows = len(rows)
elif rows is None:
num_rows = data_table.getNumberOfRows()
else:
rows = list(rows)
Expand Down Expand Up @@ -203,7 +218,7 @@ def upload_table(dataframe, table_name, parent_id, parent_type='Image',
def download_table(target_path, file_id=None, annotation_id=None,
column_names=(), rows=None, chunk_size=1000,
omero_connector=None, server=None, port=4064,
username=None, password=None):
username=None, password=None, query=None, variables=None):
"""
Downloads table data into a CSV file.
Supply either a file or annotation ID.
Expand All @@ -225,8 +240,16 @@ def download_table(target_path, file_id=None, annotation_id=None,
Default None = load all rows.
:param column_names: Iterable of column name strings to load.
Default None = load all columns.
:param query: String containing the PyTables query which would return a
subset of rows from the table. Only rows which pass this query will be
returned. Cannot be used with the 'rows' parameter.
:param variables: Dictionary containing variables to map onto the query
string.
:return: pandas.DataFrame object containing requested data
"""
if rows is not None and query is not None:
raise ValueError("Running a query supersedes the rows argument. "
"Please only supply one.")
object_id, object_type = _validate_requested_object(
file_id=file_id, annotation_id=annotation_id)

Expand All @@ -253,7 +276,14 @@ def download_table(target_path, file_id=None, annotation_id=None,
else:
target_cols = range(len(heads))
# Determine requested rows
if rows is None:
if query is not None:
if variables is None:
variables = {}
rows = data_table.getWhereList(condition=query,
variables=variables,
start=0, stop=-1, step=1)
num_rows = len(rows)
elif rows is None:
num_rows = data_table.getNumberOfRows()
else:
rows = list(rows)
Expand Down

0 comments on commit f99cc8e

Please sign in to comment.