Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XML based vtk datatype #19104

Draft
wants to merge 17 commits into
base: dev
Choose a base branch
from
4 changes: 3 additions & 1 deletion lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@
<datatype extension="gmsh.msh" type="galaxy.datatypes.constructive_solid_geometry:GmshMsh" display_in_upload="true"/>
<datatype extension="gmsh.geo" type="galaxy.datatypes.constructive_solid_geometry:GmshGeo" display_in_upload="true"/>
<datatype extension="zset.geof" type="galaxy.datatypes.constructive_solid_geometry:ZsetGeof" display_in_upload="true"/>
<datatype extension="vtu" type="galaxy.datatypes.constructive_solid_geometry:Vtu" display_in_upload="true"/>
tStehling marked this conversation as resolved.
Show resolved Hide resolved
<datatype extension="stl" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" display_in_upload="true" description="STL is a file format native to the stereolithography CAD software created by 3D Systems."/>
<!-- Povray script -->
<datatype extension="pov" type="galaxy.datatypes.text:Text" subclass="true" display_in_upload="true"/>
Expand Down Expand Up @@ -1007,7 +1008,7 @@
<datatype extension="fits" type="galaxy.datatypes.binary:FITS" mimetype="application/octet-stream" display_in_upload="true" description="Flexible Image Transport System (FITS) used in Astronomy"/>
<datatype extension="chain" type="galaxy.datatypes.chain:Chain" display_in_upload="true" description_url="https://genome.ucsc.edu/goldenPath/help/chain.html"/>
<datatype extension="ucsc.net" type="galaxy.datatypes.chain:Net" display_in_upload="true" description_url="https://genome.ucsc.edu/goldenPath/help/net.html"/>
</registration>
</registration>
tStehling marked this conversation as resolved.
Show resolved Hide resolved
<sniffers>
<!--
The order in which Galaxy attempts to determine data types is
Expand All @@ -1033,6 +1034,7 @@
<sniffer type="galaxy.datatypes.mothur:Axes"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:PlyAscii"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:PlyBinary"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:Vtu"/>
tStehling marked this conversation as resolved.
Show resolved Hide resolved
<sniffer type="galaxy.datatypes.constructive_solid_geometry:VtkAscii"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:VtkBinary"/>
<sniffer type="galaxy.datatypes.constructive_solid_geometry:NeperTess"/>
Expand Down
60 changes: 57 additions & 3 deletions lib/galaxy/datatypes/constructive_solid_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
FilePrefix,
)
from galaxy.datatypes.tabular import Tabular
from galaxy.datatypes.xml import GenericXml
from xml.etree import ElementTree as ET
bernt-matthias marked this conversation as resolved.
Show resolved Hide resolved

if TYPE_CHECKING:
from io import TextIOBase
Expand Down Expand Up @@ -189,9 +191,6 @@ class Vtk:

Binary data must be placed into the file immediately after the newline
('\\n') character from the previous ASCII keyword and parameter sequence.

TODO: only legacy formats are currently supported and support for XML formats
should be added.
"""

subtype = ""
Expand Down Expand Up @@ -813,3 +812,58 @@ def get_next_line(fh):
# Discard the rest of the line
fh.readline()
return line.strip()

class VtkXml(GenericXml):
"""Format for defining VTK (XML based) and its sub-datatypes. https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html"""

edam_data = "edam:data_3671"
tStehling marked this conversation as resolved.
Show resolved Hide resolved
edam_format = "edam:format_3621"
file_ext = "vtkxml"

MetadataElement(name="vtk_version", default=None, desc="Vtk version", readonly=True, optional=True, visible=True)
tStehling marked this conversation as resolved.
Show resolved Hide resolved
MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True)
MetadataElement(name="dataset_type", default=None, desc="Dataset type", readonly=True, optional=True, visible=True)

def set_meta(self, dataset: DatasetProtocol, **kwd) -> None:
"""
Sets metadata for the VTK dataset, including VTK version, file format, and dataset type.
"""
file_path = dataset.file_name
try:
tree = ET.parse(file_path)
tStehling marked this conversation as resolved.
Show resolved Hide resolved
root = tree.getroot()

vtk_version = root.attrib.get("version", "unknown")
tStehling marked this conversation as resolved.
Show resolved Hide resolved
file_format = root.attrib.get("type", "unknown")
dataset_type = root.tag
tStehling marked this conversation as resolved.
Show resolved Hide resolved
self.vtk_version = vtk_version
self.file_format = file_format
self.dataset_type = dataset_type

except ET.ParseError:
self.vtk_version = "unknown"
self.file_format = "unknown"
self.dataset_type = "unknown"


def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text for VTK dataset files."""
if not dataset.dataset.purged:
dataset.peek = "VTK Dataset file"
dataset.blurb = data.get_file_peek(dataset.get_file_name())
tStehling marked this conversation as resolved.
Show resolved Hide resolved
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disk"

def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""Check for the key string 'VTKFile' to determine if this is a VTK dataset file.

>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('data.vtk')
>>> Vtk().sniff(fname)
True
>>> fname = get_test_fname('solid.xml')
>>> Vtk().sniff(fname)
False
"""
return self._has_root_element_in_prefix(file_prefix, "VTKFile")
41 changes: 41 additions & 0 deletions lib/galaxy/datatypes/test/data.vtu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?xml version="1.0"?>
<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian" header_type="UInt32" compressor="vtkZLibDataCompressor">
<UnstructuredGrid>
<Piece NumberOfPoints="21" NumberOfCells="21">
<PointData>
<DataArray type="Float64" Name="P_BC_FLOW" format="binary" RangeMin="-2" RangeMax="0">
AQAAAACAAACoAAAAFgAAAA==eJxjYCAKHCBAE1JPqvno4gwABYYGwQ==
</DataArray>
<DataArray type="UInt64" Name="bulk_node_ids" format="binary" RangeMin="0" RangeMax="70">
AQAAAACAAACoAAAAOwAAAA==eJwtxRECgDAAAMBJkDQomQySBk2SYNKg/v+kQXdyIfySJ2fP3r34cHTx6tObqy/fbn7c/frzAL7YAwQ=
</DataArray>
</PointData>
<CellData>
</CellData>
<Points>
<DataArray type="Float64" Name="Points" NumberOfComponents="3" format="binary" RangeMin="42.42640687119285" RangeMax="94.33981132056604">
AQAAAACAAAD4AQAAYwAAAA==eJxjYAABOwcw1RAIoeEAXVzlAIo4gx929XBxqHq4fjT1GOLo6j1xqPdEVQ+31wWHe2Di6OajqccQRzffDlU9hjiaerh5Jmjm+KGKY4QDurgnqjiGv9DVu+BQbwcXBwAoLCke
<InformationKey name="L2_NORM_RANGE" location="vtkDataArray" length="2">
<Value index="0">
42.426406871
</Value>
<Value index="1">
94.339811321
</Value>
</InformationKey>
</DataArray>
</Points>
<Cells>
<DataArray type="Int64" Name="connectivity" format="binary" RangeMin="0" RangeMax="20">
AQAAAACAAACoAAAAMQAAAA==eJwtxUECQCAAALAQRSLx/7d2sF0Wwm/y7MXRqzcnZ+8+XHy6+vLt5sfdrz8PMMgA0w==
</DataArray>
<DataArray type="Int64" Name="offsets" format="binary" RangeMin="1" RangeMax="21">
AQAAAACAAACoAAAAMgAAAA==eJwtxbcBgCAAADArFsAC8v+rDiRLxqGbPHvx6uDNuw+fjk7Ovnz78evi6s/NPzgAAOg=
</DataArray>
<DataArray type="UInt8" Name="types" format="binary" RangeMin="1" RangeMax="1">
AQAAAACAAAAVAAAACwAAAA==eJxjZMQCAAD8ABY=
</DataArray>
</Cells>
</Piece>
</UnstructuredGrid>
</VTKFile>
25 changes: 25 additions & 0 deletions lib/galaxy/datatypes/test/solid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<phase>
<type>Solid</type>
<properties>
<property>
<name>density</name>
<type>Constant</type>
<value>2300</value>
</property>
<property>
<name>thermal_conductivity</name>
<type>Constant</type>
<value>1.9</value>
</property>
<property>
<name>specific_heat_capacity</name>
<type>Constant</type>
<value>800</value>
</property>
<property>
<name>thermal_expansivity</name>
<type>Constant</type>
<value>1.7e-5</value>
</property>
</properties>
</phase>
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,4 +270,4 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Checking for keyword - '<sbml' in the first 200 lines.
"""
return file_prefix.search(SBML_MARKER)
return file_prefix.search(SBML_MARKER)
tStehling marked this conversation as resolved.
Show resolved Hide resolved