Skip to content

Commit

Permalink
Address BEP031 macro requests (#945)
Browse files Browse the repository at this point in the history
* Allow table-specific numbers of duplicate suffixes/exts to combine.

* Only combine compressed versions of extensions.
  • Loading branch information
tsalo authored Dec 3, 2021
1 parent 79091c9 commit 7b63f8e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 23 deletions.
4 changes: 2 additions & 2 deletions src/04-modality-specific-files/10-microscopy.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The current MACRO list ".ome.btf", ".png" and ".tif[.ome]", is it possible to ha
If not, ".tif" and ".ome.tif" should be 2 separate lines but are concatenated as ".tif[.ome]"
-->
{{ MACROS___make_filename_template(datatypes=["micr"], suffixes=["TEM", "SEM", "uCT", "BF", "DF",
"PC", "DIC", "FLUO", "CONF", "PLI", "CARS", "2PE", "MPE", "SR", "NLO", "OCT", "SPIM"]) }}
"PC", "DIC", "FLUO", "CONF", "PLI", "CARS", "2PE", "MPE", "SR", "NLO", "OCT", "SPIM"], n_dupes_to_combine=3) }}

Microscopy data MUST be stored in the `micr` directory.

Expand Down Expand Up @@ -459,7 +459,7 @@ sub-<label>/
```
The current MACRO list ".jpg", ".png" and ".tif", is it possible to have one line with "<extension>" instead?
-->
{{ MACROS___make_filename_template(datatypes=["micr"], suffixes=["photo"]) }}
{{ MACROS___make_filename_template(datatypes=["micr"], suffixes=["photo"], n_dupes_to_combine=3) }}

The file `<extension>` for photos MUST be either `.jpg`, `.png` or `.tif`.

Expand Down
12 changes: 7 additions & 5 deletions tools/schemacode/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,17 @@ def _add_entity(filename_template, entity_pattern, requirement_level):
return filename_template


def make_filename_template(schema, **kwargs):
"""Create codeblocks containing example filename patterns for a given
datatype.
def make_filename_template(schema, n_dupes_to_combine=6, **kwargs):
"""Create codeblocks containing example filename patterns for a given datatype.
Parameters
----------
schema : dict
The schema object, which is a dictionary with nested dictionaries and
lists stored within it.
n_dupes_to_combine : int
The minimum number of suffixes/extensions to combine in the template as
<suffix>/<extension>.
kwargs : dict
Keyword arguments used to filter the schema.
Example kwargs that may be used include: "suffixes", "datatypes",
Expand Down Expand Up @@ -298,7 +300,7 @@ def make_filename_template(schema, **kwargs):

# In cases of large numbers of suffixes,
# we use the "suffix" variable and expect a table later in the spec
if len(group["suffixes"]) > 5:
if len(group["suffixes"]) >= n_dupes_to_combine:
suffix = "_<suffix>"
string += suffix
strings = [string]
Expand All @@ -314,7 +316,7 @@ def make_filename_template(schema, **kwargs):
ext if ext != "*" else ".<extension>" for ext in extensions
]
extensions = utils.combine_extensions(extensions)
if len(extensions) > 5:
if len(extensions) >= n_dupes_to_combine:
# Combine exts when there are many, but keep JSON separate
if ".json" in extensions:
extensions = [".<extension>", ".json"]
Expand Down
38 changes: 22 additions & 16 deletions tools/schemacode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,35 @@ def get_schema_path():
def combine_extensions(lst):
"""Combine extensions with their compressed versions in a list.
This is a basic solution to combining extensions with their
compressed versions in a list. Something more robust could
be written in the future.
Valid combinations are hardcoded in the function,
since some extensions look like compressed versions of one another, but are not.
Parameters
----------
lst : list of str
Raw list of extensions.
Returns
-------
new_lst : list of str
List of extensions, with compressed and uncompressed versions of the same extension
combined.
"""
COMPRESSION_EXTENSIONS = [".gz"]

new_lst = []
# First, sort by length
lst = sorted(lst, key=len)
items_to_remove = []
for item in lst:
temp_lst = new_lst[:]

item_found = False
for j, new_item in enumerate(temp_lst):
if new_item in item:
temp_item = new_item + "[" + item.replace(new_item, "", 1) + "]"
new_lst[j] = temp_item
item_found = True

if not item_found:
new_lst.append(item)
for ext in COMPRESSION_EXTENSIONS:
if item.endswith(ext) and item.replace(ext, "") in lst:
temp_item = item.replace(ext, "") + "[" + ext + "]"
new_lst.append(temp_item)
items_to_remove.append(item)
items_to_remove.append(item.replace(ext, ""))
continue

items_to_add = [item for item in lst if item not in items_to_remove]
new_lst += items_to_add

return new_lst

Expand Down

0 comments on commit 7b63f8e

Please sign in to comment.