Skip to content

Commit

Permalink
Merge pull request #12941 from bernt-matthias/topic/data-table-filter
Browse files Browse the repository at this point in the history
Dynamic options: add data table filter
  • Loading branch information
mvdbeek authored Nov 22, 2024
2 parents cc36fad + db1b370 commit 363ab53
Show file tree
Hide file tree
Showing 6 changed files with 392 additions and 8 deletions.
186 changes: 185 additions & 1 deletion lib/galaxy/tool_util/linters/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ast
import re
import warnings
from copy import deepcopy
from typing import (
Iterator,
Optional,
Expand Down Expand Up @@ -130,7 +131,6 @@
]

# TODO lint for valid param type - attribute combinations
# TODO check if dataset is available for filters referring other datasets
# TODO check if ref input param is present for from_dataset


Expand Down Expand Up @@ -490,6 +490,190 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
)


FILTER_REQUIRED_ATTRIBUTES = {
"data_meta": ["type", "ref", "key"], # column needs special treatment
"param_value": ["type", "ref", "column"],
"static_value": ["type", "column", "value"],
"regexp": ["type", "column", "value"],
"unique_value": ["type", "column"],
"multiple_splitter": ["type", "column"],
"attribute_value_splitter": ["type", "column"],
"add_value": ["type", "value"],
"remove_value": ["type"], # this is handled separately in InputsOptionsRemoveValueFilterRequiredAttributes
"sort_by": ["type", "column"],
"data_table": ["type", "column", "table_name", "data_table_column"],
}


class InputsOptionsFiltersRequiredAttributes(Linter):
"""
check required attributes of filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES:
continue
for attrib in FILTER_REQUIRED_ATTRIBUTES[filter_type]:
if attrib not in filter.attrib:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}' filter misses required attribute '{attrib}'",
node=filter,
)


class InputsOptionsRemoveValueFilterRequiredAttributes(Linter):
"""
check required attributes of remove_value filter
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
# check for required attributes for filter (remove_value needs a bit more logic here)
if filter_type != "remove_value":
continue
if not (
(
"value" in filter.attrib
and "ref" not in filter.attrib
and "meta_ref" not in filter.attrib
and "key" not in filter.attrib
)
or (
"value" not in filter.attrib
and "ref" in filter.attrib
and "meta_ref" not in filter.attrib
and "key" not in filter.attrib
)
or (
"value" not in filter.attrib
and "ref" not in filter.attrib
and "meta_ref" in filter.attrib
and "key" in filter.attrib
)
):
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter needs either the 'value'; 'ref'; or 'meta' and 'key' attribute(s)",
node=filter,
)


FILTER_ALLOWED_ATTRIBUTES = deepcopy(FILTER_REQUIRED_ATTRIBUTES)
FILTER_ALLOWED_ATTRIBUTES["static_value"].append("keep")
FILTER_ALLOWED_ATTRIBUTES["regexp"].append("keep")
FILTER_ALLOWED_ATTRIBUTES["data_meta"].extend(["column", "multiple", "separator"])
FILTER_ALLOWED_ATTRIBUTES["param_value"].extend(["keep", "ref_attribute"])
FILTER_ALLOWED_ATTRIBUTES["multiple_splitter"].append("separator")
FILTER_ALLOWED_ATTRIBUTES["attribute_value_splitter"].extend(["pair_separator", "name_val_separator"])
FILTER_ALLOWED_ATTRIBUTES["add_value"].extend(["name", "index"])
FILTER_ALLOWED_ATTRIBUTES["remove_value"].extend(["value", "ref", "meta_ref", "key"])
FILTER_ALLOWED_ATTRIBUTES["data_table"].append("keep")


class InputsOptionsFiltersAllowedAttributes(Linter):
"""
check allowed attributes of filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES:
continue
for attrib in filter.attrib:
if attrib not in FILTER_ALLOWED_ATTRIBUTES[filter_type]:
lint_ctx.warn(
f"Select parameter [{param_name}] '{filter_type}' filter specifies unnecessary attribute '{attrib}'",
node=filter,
)


class InputsOptionsRegexFilterExpression(Linter):
"""
Check the regular expression of regexp filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type == "regexp" and "value" in filter.attrib:
try:
re.compile(filter.attrib["value"])
except re.error as re_error:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter 'value' is not a valid regular expression ({re_error})'",
node=filter,
)


class InputsOptionsFiltersCheckReferences(Linter):
"""
Check the references used in filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

# get the set of param names
param_names = {param_name for _, param_name in _iter_param(tool_xml)}

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is not None:
# check for references to other inputs
# TODO: currently ref and metaref seem only to work for top level params,
# once this is fixed the linter needs to be extended, e.g. `f.attrib[ref_attrib].split('|')[-1]`
for ref_attrib in ["meta_ref", "ref"]:
if ref_attrib in filter.attrib and filter.attrib[ref_attrib] not in param_names:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter attribute '{ref_attrib}' refers to non existing parameter '{filter.attrib[ref_attrib]}'",
node=filter,
)


class InputsDataOptionsFiltersRef(Linter):
"""
Lint for set ref for filters of data parameters
Expand Down
18 changes: 16 additions & 2 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -5650,8 +5650,9 @@ Currently the following filters are defined:
* ``data_meta`` populate or filter options based on the metadata of another input parameter specified by ``ref``. If a ``column`` is given options are filtered for which the entry in this column ``column`` is equal to metadata of the input parameter specified by ``ref``.
If no ``column`` is given the metadata value of the referenced input is added to the options list (in this case the corresponding ``options`` tag must not have the ``from_data_table`` or ``from_dataset`` attributes).
In both cases the desired metadata is selected by ``key``.
* ``data_table`` remove values according to the entries of a data table. Remove options where the value in ``column`` appears in the data table ``table_name`` in column ``table_column``. Setting ``keep`` will to ``true`` will keep only entries also appearing in the data table.
The ``static_value`` and ``regexp`` filters can be inverted by setting ``keep`` to true.
The ``static_value``, ``regexp``, and ``data_table`` filters can be inverted by setting ``keep`` to true.
* ``add_value``: add an option with a given ``name`` and ``value`` to the options. By default, the new option is appended, with ``index`` the insertion position can be specified.
* ``remove_value``: remove a value from the options. Either specified explicitly with ``value``, the value of another input specified with ``ref``, or the metadata ``key`` of another input ``meta_ref``.
Expand Down Expand Up @@ -5825,7 +5826,7 @@ only used if ``multiple`` is set to ``true``.]]></xs:documentation>
<xs:annotation>
<xs:documentation xml:lang="en">If ``true``, keep columns matching the
value, if ``false`` discard columns matching the value. Used when ``type`` is
either ``static_value``, ``regexp`` or ``param_value``. Default: true</xs:documentation>
either ``static_value``, ``regexp``, ``param_value`` or ``data_table``. Default: true.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="value" type="xs:string">
Expand Down Expand Up @@ -5874,6 +5875,18 @@ from the list.</xs:documentation>
<xs:documentation xml:lang="en">Only used if ``type`` is ``attribute_value_splitter``. This is used to separate attributes and values from each other within an attribute-value pair, i.e. ``=`` if the target content is ``A=V; B=W; C=Y``. Defaults to whitespace.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="table_name" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">Only used when ``type`` is
``data_table``. The name of the data table to use.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="data_table_column" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">Only used when ``type`` is
``data_table``. The column of the data table to use (0 based index or column name).</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="Outputs">
<xs:annotation>
Expand Down Expand Up @@ -7926,6 +7939,7 @@ and ``bibtex`` are the only supported options.</xs:documentation>
<xs:enumeration value="add_value"/>
<xs:enumeration value="remove_value"/>
<xs:enumeration value="sort_by"/>
<xs:enumeration value="data_table"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="ActionsConditionalFilterType">
Expand Down
56 changes: 56 additions & 0 deletions lib/galaxy/tools/parameters/dynamic_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,61 @@ def filter_options(self, options, trans, other_values):
return sorted(options, key=lambda x: x[self.column], reverse=self.reverse)


class DataTableFilter(Filter):
"""
Filters a list of options by entries present in a data table, i.e.
option[column] needs to be in the specified data table column
Type: data_table
Required Attributes:
- column: column in options to compare with
- table_name: data table to use
- data_table_column: data table column to use
Optional Attributes:
- keep: Keep options where option[column] is in the data table column (True)
Discard columns matching value (False)
"""

def __init__(self, d_option, elem):
Filter.__init__(self, d_option, elem)
self.table_name = elem.get("table_name", None)
assert self.table_name is not None, "Required 'table_name' attribute missing from filter"
column = elem.get("column", None)
assert column is not None, "Required 'column' attribute missing from filter"
self.column = d_option.column_spec_to_index(column)
self.data_table_column = elem.get("data_table_column", None)
assert self.data_table_column is not None, "Required 'data_table_column' attribute missing from filter"
self.keep = string_as_bool(elem.get("keep", "True"))

def filter_options(self, options, trans, other_values):
# get column from data table, by index or column name
entries = None
try:
entries = {f[int(self.data_table_column)] for f in trans.app.tool_data_tables[self.table_name].get_fields()}
except ValueError:
pass
try:
entries = {
f[self.data_table_column] for f in trans.app.tool_data_tables[self.table_name].get_named_fields_list()
}
except KeyError:
pass
if entries is None:
log.error(f"could not get data from column {self.data_table_column} from data_table {self.table_name}")
return options

rval = []
for o in options:
if self.keep == (o[self.column] in entries):
rval.append(o)
return rval


filter_types = dict(
data_meta=DataMetaFilter,
param_value=ParamValueFilter,
Expand All @@ -549,6 +604,7 @@ def filter_options(self, options, trans, other_values):
add_value=AdditionalValueFilter,
remove_value=RemoveValueFilter,
sort_by=SortByColumnFilter,
data_table=DataTableFilter,
)


Expand Down
42 changes: 42 additions & 0 deletions test/functional/tools/filter_data_table.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<tool id="filter_data_table" name="filter data table" version="0.1.0">
<description>Filter on datatable entries</description>
<command><![CDATA[
echo '$dynamic_select' > '$output'
]]></command>
<inputs>
<param name="dynamic_select" type="select">
<!-- initialize options from file (contains all entries that could be added to the data table) -->
<options from_url="https://usegalaxy.org/api/genomes">
<column name="name" index="0"/>
<column name="value" index="0"/>
<!-- filter options that are already in the data table -->
<filter type="data_table" column="value" table_name="test_fasta_indexes" data_table_column="dbkey" keep="true"/>
<validator type="no_options" message="No options available" />
</options>
<!-- <options from_file="test_file.tsv">
</options> -->
</param>
</inputs>

<outputs>
<data name="output" format="txt" />
</outputs>

<tests>
<!-- select the value that is absent from the data table -->
<test expect_failure="false">
<param name="dynamic_select" value="hg19" />
<output name="output">
<assert_contents>
<has_text text="hg19"/>
</assert_contents>
</output>
</test>
<!-- selecting the value that is already in the data table (and therefor filtered) fails -->
<test expect_failure="true">
<param name="dynamic_select" value="ancCey1" />
</test>
</tests>
<help>
</help>
</tool>
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
<tool file="dbkey_filter_input.xml" />
<tool file="dbkey_filter_multi_input.xml" />
<tool file="dbkey_filter_collection.xml" />
<tool file="filter_data_table.xml" />
<tool file="dbkey_output_action.xml" />
<tool file="composite_output.xml" />
<tool file="composite_output_tests.xml" />
Expand Down
Loading

0 comments on commit 363ab53

Please sign in to comment.