Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XML based vtk datatype #19104

Draft
wants to merge 17 commits into
base: dev
Choose a base branch
from
2 changes: 2 additions & 0 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@
<datatype extension="gmsh.msh" type="galaxy.datatypes.constructive_solid_geometry:GmshMsh" display_in_upload="true"/>
<datatype extension="gmsh.geo" type="galaxy.datatypes.constructive_solid_geometry:GmshGeo" display_in_upload="true"/>
<datatype extension="zset.geof" type="galaxy.datatypes.constructive_solid_geometry:ZsetGeof" display_in_upload="true"/>
<datatype extension="vtkxml" type="galaxy.datatypes.constructive_solid_geometry:VtkXml" display_in_upload="true"/>
<datatype extension="stl" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" display_in_upload="true" description="STL is a file format native to the stereolithography CAD software created by 3D Systems."/>
<!-- Povray script -->
<datatype extension="pov" type="galaxy.datatypes.text:Text" subclass="true" display_in_upload="true"/>
Expand Down Expand Up @@ -1033,6 +1034,7 @@
<sniffer type="galaxy.datatypes.mothur:Axes"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:PlyAscii"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:PlyBinary"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:VtkXml"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:VtkAscii"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:VtkBinary"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:NeperTess"/>
Expand Down
63 changes: 60 additions & 3 deletions lib/galaxy/datatypes/constructive_solid_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import abc
import re
from typing import (
List,
Optional,
Expand All @@ -29,6 +30,8 @@
FilePrefix,
)
from galaxy.datatypes.tabular import Tabular
from galaxy.datatypes.xml import GenericXml
from xml.etree import ElementTree as ET
bernt-matthias marked this conversation as resolved.
Show resolved Hide resolved

if TYPE_CHECKING:
from io import TextIOBase
Expand Down Expand Up @@ -189,9 +192,6 @@ class Vtk:

Binary data must be placed into the file immediately after the newline
('\\n') character from the previous ASCII keyword and parameter sequence.

TODO: only legacy formats are currently supported and support for XML formats
should be added.
"""

subtype = ""
Expand Down Expand Up @@ -813,3 +813,60 @@ def get_next_line(fh):
# Discard the rest of the line
fh.readline()
return line.strip()

class VtkXml(GenericXml):
"""Format for defining VTK (XML based) and its sub-datatypes. https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html"""

edam_format = "edam:format_2332"
file_ext = "vtkxml"

#The same MetadataElements are also available for legacy VTK datatypes.
MetadataElement(name="vtk_version", default=None, desc="Vtk version", readonly=True, optional=True, visible=True)
tStehling marked this conversation as resolved.
Show resolved Hide resolved
MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True)
MetadataElement(name="dataset_type", default=None, desc="Dataset type", readonly=True, optional=True, visible=True)

def extract_version(self, line: str) -> str:
match = re.search(r'version="([^"]+)"', line)
if match:
return match.group(1)
return "?"

def extract_type(self, line: str) -> str:
match = re.search(r'type="([^"]+)"', line)
if match:
return match.group(1)
return "?"

def set_meta(self, dataset: DatasetProtocol, **kwd) -> None:
dataset.metadata.file_format = "XML";
bernt-matthias marked this conversation as resolved.
Show resolved Hide resolved
with open(dataset.get_file_name(), errors="ignore") as file:
# first line might be the xml header, so we take two
first_line = file.readline()
if first_line.startswith("<?xml"):
first_line = file.readline()
dataset.metadata.vtk_version = self.extract_version(first_line)
dataset.metadata.dataset_type = self.extract_type(first_line)


def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text for VTK dataset files."""
if not dataset.dataset.purged:
dataset.peek = "VTK Dataset file"
dataset.blurb = f"type {dataset.metadata.dataset_type} version {dataset.metadata.vtk_version}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disk"

def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""Check for the key string 'VTKFile' to determine if this is a VTK dataset file.

>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('data.vtu')
>>> VtkXml().sniff(fname)
True
>>> fname = get_test_fname('solid.xml')
>>> VtkXml().sniff(fname)
False
"""
return self._has_root_element_in_prefix(file_prefix, "VTKFile")

41 changes: 41 additions & 0 deletions lib/galaxy/datatypes/test/data.vtu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?xml version="1.0"?>
<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian" header_type="UInt32" compressor="vtkZLibDataCompressor">
<UnstructuredGrid>
<Piece NumberOfPoints="21" NumberOfCells="21">
<PointData>
<DataArray type="Float64" Name="P_BC_FLOW" format="binary" RangeMin="-2" RangeMax="0">
AQAAAACAAACoAAAAFgAAAA==eJxjYCAKHCBAE1JPqvno4gwABYYGwQ==
</DataArray>
<DataArray type="UInt64" Name="bulk_node_ids" format="binary" RangeMin="0" RangeMax="70">
AQAAAACAAACoAAAAOwAAAA==eJwtxRECgDAAAMBJkDQomQySBk2SYNKg/v+kQXdyIfySJ2fP3r34cHTx6tObqy/fbn7c/frzAL7YAwQ=
</DataArray>
</PointData>
<CellData>
</CellData>
<Points>
<DataArray type="Float64" Name="Points" NumberOfComponents="3" format="binary" RangeMin="42.42640687119285" RangeMax="94.33981132056604">
AQAAAACAAAD4AQAAYwAAAA==eJxjYAABOwcw1RAIoeEAXVzlAIo4gx929XBxqHq4fjT1GOLo6j1xqPdEVQ+31wWHe2Di6OajqccQRzffDlU9hjiaerh5Jmjm+KGKY4QDurgnqjiGv9DVu+BQbwcXBwAoLCke
<InformationKey name="L2_NORM_RANGE" location="vtkDataArray" length="2">
<Value index="0">
42.426406871
</Value>
<Value index="1">
94.339811321
</Value>
</InformationKey>
</DataArray>
</Points>
<Cells>
<DataArray type="Int64" Name="connectivity" format="binary" RangeMin="0" RangeMax="20">
AQAAAACAAACoAAAAMQAAAA==eJwtxUECQCAAALAQRSLx/7d2sF0Wwm/y7MXRqzcnZ+8+XHy6+vLt5sfdrz8PMMgA0w==
</DataArray>
<DataArray type="Int64" Name="offsets" format="binary" RangeMin="1" RangeMax="21">
AQAAAACAAACoAAAAMgAAAA==eJwtxbcBgCAAADArFsAC8v+rDiRLxqGbPHvx6uDNuw+fjk7Ovnz78evi6s/NPzgAAOg=
</DataArray>
<DataArray type="UInt8" Name="types" format="binary" RangeMin="1" RangeMax="1">
AQAAAACAAAAVAAAACwAAAA==eJxjZMQCAAD8ABY=
</DataArray>
</Cells>
</Piece>
</UnstructuredGrid>
</VTKFile>
25 changes: 25 additions & 0 deletions lib/galaxy/datatypes/test/solid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<phase>
<type>Solid</type>
<properties>
<property>
<name>density</name>
<type>Constant</type>
<value>2300</value>
</property>
<property>
<name>thermal_conductivity</name>
<type>Constant</type>
<value>1.9</value>
</property>
<property>
<name>specific_heat_capacity</name>
<type>Constant</type>
<value>800</value>
</property>
<property>
<name>thermal_expansivity</name>
<type>Constant</type>
<value>1.7e-5</value>
</property>
</properties>
</phase>
18 changes: 18 additions & 0 deletions test/unit/data/datatypes/test_constructive_solid_geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

from galaxy.datatypes.constructive_solid_geometry import VtkXml
from .util import (
get_input_files,
MockDataset,
)

def test_vtkXml_set_meta():
vtkXml = VtkXml()
with get_input_files("data.vtu") as input_files:
dataset = MockDataset(1)
dataset.set_file_name(input_files[0])

vtkXml.set_meta(dataset)

assert dataset.metadata.vtk_version == "0.1"
assert dataset.metadata.file_format == "XML"
assert dataset.metadata.dataset_type == "UnstructuredGrid"
Loading