From a3919c78e76f43f465e28b4610a356c27a09746d Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 30 Oct 2019 12:38:20 +0000 Subject: [PATCH 01/11] read APBS .dx.gz generated grids --- gridData/OpenDX.py | 16 +++++++++++----- gridData/core.py | 8 ++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index c3df43b..51571af 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -165,6 +165,7 @@ import re from six import next from six.moves import range +import gzip import warnings @@ -483,7 +484,7 @@ def write(self, filename): for component,object in self.sorted_components(): outfile.write('component "%s" value %s\n' % (component,str(object.id))) - def read(self,file): + def read(self, file, gz=False): """Read DX field from file. dx = OpenDX.field.read(dxfile) @@ -492,7 +493,7 @@ def read(self,file): """ DXfield = self p = DXParser(file) - p.parse(DXfield) + p.parse(DXfield, gz) def add(self,component,DXobj): """add a component to the field""" @@ -652,7 +653,7 @@ def __init__(self, filename): } - def parse(self,DXfield): + def parse(self, DXfield, gz=False): """Parse the dx file and construct a DX field object with component classes. A :class:`field` instance *DXfield* must be provided to be @@ -678,8 +679,13 @@ def parse(self,DXfield): self.currentobject = None # containers for data self.objects = [] # | self.tokens = [] # token buffer - with open(self.filename, 'r') as self.dxfile: - self.use_parser('general') # parse the whole file and populate self.objects + + if gz: + with gzip.open(self.filename, 'rt') as self.dxfile: + self.use_parser('general') # parse the whole file and populate self.objects + else: + with open(self.filename, 'r') as self.dxfile: + self.use_parser('general') # parse the whole file and populate self.objects # assemble field from objects for o in self.objects: diff --git a/gridData/core.py b/gridData/core.py index 4440833..cfab29d 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -125,6 +125,7 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, self._loaders = { 'CCP4': self._load_cpp4, 'DX': self._load_dx, + 'DXGZ': self._load_dxgz, 'PLT': self._load_plt, 'PKL': self._load_python, 'PICKLE': self._load_python, # compatibility @@ -439,6 +440,13 @@ def _load_dx(self, filename): grid, edges = dx.histogramdd() self.__init__(grid=grid, edges=edges, metadata=self.metadata) + def _load_dxgz(self, filename): + """Initializes Grid from a OpenDX file.""" + dx = OpenDX.field(0) + dx.read(filename, gz=True) + grid, edges = dx.histogramdd() + self.__init__(grid=grid, edges=edges, metadata=self.metadata) + def _load_plt(self, filename): """Initialize Grid from gOpenMol plt file.""" g = gOpenMol.Plt() From 40abc1ba2384c29e8441099753e3e37c093f6520 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Tue, 5 Nov 2019 17:56:41 +0000 Subject: [PATCH 02/11] add DXGZ export capability and tests --- AUTHORS | 1 + gridData/core.py | 25 +++++++--- gridData/tests/datafiles/__init__.py | 3 +- gridData/tests/datafiles/test.dx.gz | Bin 0 -> 499 bytes gridData/tests/test_dx.py | 66 +++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 gridData/tests/datafiles/test.dx.gz diff --git a/AUTHORS b/AUTHORS index 8d55219..6878b7b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -17,3 +17,4 @@ Contributors: * Dominik Mierzejewski * Tyler Luchko * Giacomo Fiorin +* Eloy FĂ©lix diff --git a/gridData/core.py b/gridData/core.py index cfab29d..1c43f12 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -32,6 +32,7 @@ import os import errno +import gzip import numpy @@ -118,6 +119,7 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, # file formats are guess from extension == lower case key self._exporters = { 'DX': self._export_dx, + 'DXGZ': self._export_dxgz, 'PKL': self._export_python, 'PICKLE': self._export_python, # compatibility 'PYTHON': self._export_python, # compatibility @@ -433,19 +435,16 @@ def _load_cpp4(self, filename): grid, edges = ccp4.histogramdd() self.__init__(grid=grid, edges=edges, metadata=self.metadata) - def _load_dx(self, filename): + def _load_dx(self, filename, gz=False): """Initializes Grid from a OpenDX file.""" dx = OpenDX.field(0) - dx.read(filename) + dx.read(filename, gz) grid, edges = dx.histogramdd() self.__init__(grid=grid, edges=edges, metadata=self.metadata) def _load_dxgz(self, filename): - """Initializes Grid from a OpenDX file.""" - dx = OpenDX.field(0) - dx.read(filename, gz=True) - grid, edges = dx.histogramdd() - self.__init__(grid=grid, edges=edges, metadata=self.metadata) + """Initializes Grid from a gzipped OpenDX file.""" + self._load_dx(filename=filename, gz=True) def _load_plt(self, filename): """Initialize Grid from gOpenMol plt file.""" @@ -554,6 +553,18 @@ def _export_dx(self, filename, type=None, typequote='"', **kwargs): dx = OpenDX.field('density', components=components, comments=comments) dx.write(filename) + def _export_dxgz(self, filename, type=None, typequote='"', **kwargs): + self._export_dx(filename, type, typequote, **kwargs) + root, ext = os.path.splitext(filename) + filename = root + '.dx' + + with open(filename, 'rb') as in_file: + gz_filename = filename + '.gz' + with gzip.open(gz_filename, 'wb') as out_file: + for l in in_file: + out_file.write(l) + os.remove(filename) + def save(self, filename): """Save a grid object to .pickle diff --git a/gridData/tests/datafiles/__init__.py b/gridData/tests/datafiles/__init__.py index 4e3f21f..dd0f65b 100644 --- a/gridData/tests/datafiles/__init__.py +++ b/gridData/tests/datafiles/__init__.py @@ -2,9 +2,10 @@ from pkg_resources import resource_filename -__all__ = ["DX", "CCP4", "gOpenMol"] +__all__ = ["DX", "DXGZ", "CCP4", "gOpenMol"] DX = resource_filename(__name__, 'test.dx') +DXGZ = resource_filename(__name__, 'test.dx.gz') CCP4 = resource_filename(__name__, 'test.ccp4') # from http://www.ebi.ac.uk/pdbe/coordinates/files/1jzv.ccp4 # (see issue #57) diff --git a/gridData/tests/datafiles/test.dx.gz b/gridData/tests/datafiles/test.dx.gz new file mode 100644 index 0000000000000000000000000000000000000000..7795203d6669d480c42b74c2a361f62660d3368c GIT binary patch literal 499 zcmVElbKF@zPfb%0890^68mcopz!@ zpZ4B0efIhx;oRnZ@#SrCVx_PP2#0`eiBh5jWq{5IXA}}02qS?Mr^!y~;0!v;u9Y=D zSrnAN+GqY7R&-(n>saClAIwDiK5WPB^N&eJ49s{Av|4yVPb~CHxKWvA8vq8z`CBU^ zv>svDf=V#Ckb;CV23>1Cfwr~_PVPQg!sAr@`mq2x?A)b^>TDm;gEdguwnal8O4Rm= zp)97A!TZRTKd~anq0++p$?&yJruBgY`|w8AsfN4UXGxxg2M$Sg33holhml0hbAfy* zWq$_?;#$c6i}(k+xZbZ^SC$7;-Sm9{A7=37jA-l-VrlzQV+>B1N60`CwjLfphO@1$ z=2`h@+4TCndP(L6ZEhp~liAJjOV~HI5JXOuJ%vWFi^22KG!S18FkX6(=RYo Date: Tue, 5 Nov 2019 23:55:10 +0000 Subject: [PATCH 03/11] code review fixes --- gridData/OpenDX.py | 12 ++-- gridData/core.py | 34 +++++------- gridData/tests/datafiles/__init__.py | 3 +- gridData/tests/test_dx.py | 82 +++------------------------- 4 files changed, 27 insertions(+), 104 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index 51571af..f7cb0ef 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -484,7 +484,7 @@ def write(self, filename): for component,object in self.sorted_components(): outfile.write('component "%s" value %s\n' % (component,str(object.id))) - def read(self, file, gz=False): + def read(self, file): """Read DX field from file. dx = OpenDX.field.read(dxfile) @@ -493,7 +493,7 @@ def read(self, file, gz=False): """ DXfield = self p = DXParser(file) - p.parse(DXfield, gz) + p.parse(DXfield) def add(self,component,DXobj): """add a component to the field""" @@ -653,7 +653,7 @@ def __init__(self, filename): } - def parse(self, DXfield, gz=False): + def parse(self, DXfield): """Parse the dx file and construct a DX field object with component classes. A :class:`field` instance *DXfield* must be provided to be @@ -680,12 +680,12 @@ def parse(self, DXfield, gz=False): self.objects = [] # | self.tokens = [] # token buffer - if gz: + if self.filename.endswith('.gz'): with gzip.open(self.filename, 'rt') as self.dxfile: - self.use_parser('general') # parse the whole file and populate self.objects + self.use_parser('general') else: with open(self.filename, 'r') as self.dxfile: - self.use_parser('general') # parse the whole file and populate self.objects + self.use_parser('general') # parse the whole file and populate self.objects # assemble field from objects for o in self.objects: diff --git a/gridData/core.py b/gridData/core.py index 1c43f12..b66ff13 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -119,7 +119,6 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, # file formats are guess from extension == lower case key self._exporters = { 'DX': self._export_dx, - 'DXGZ': self._export_dxgz, 'PKL': self._export_python, 'PICKLE': self._export_python, # compatibility 'PYTHON': self._export_python, # compatibility @@ -127,7 +126,6 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, self._loaders = { 'CCP4': self._load_cpp4, 'DX': self._load_dx, - 'DXGZ': self._load_dxgz, 'PLT': self._load_plt, 'PKL': self._load_python, 'PICKLE': self._load_python, # compatibility @@ -383,7 +381,11 @@ def _guess_format(self, filename, file_format=None, export=True): else: available = self._loaders if file_format is None: - file_format = os.path.splitext(filename)[1][1:] + splitted = os.path.splitext(filename) + if splitted[1][1:] in ('gz', ): + file_format = os.path.splitext(splitted[0])[1][1:] + else: + file_format = splitted[1][1:] file_format = file_format.upper() if not file_format: file_format = self.default_format @@ -435,17 +437,13 @@ def _load_cpp4(self, filename): grid, edges = ccp4.histogramdd() self.__init__(grid=grid, edges=edges, metadata=self.metadata) - def _load_dx(self, filename, gz=False): + def _load_dx(self, filename): """Initializes Grid from a OpenDX file.""" dx = OpenDX.field(0) - dx.read(filename, gz) + dx.read(filename) grid, edges = dx.histogramdd() self.__init__(grid=grid, edges=edges, metadata=self.metadata) - def _load_dxgz(self, filename): - """Initializes Grid from a gzipped OpenDX file.""" - self._load_dx(filename=filename, gz=True) - def _load_plt(self, filename): """Initialize Grid from gOpenMol plt file.""" g = gOpenMol.Plt() @@ -552,18 +550,12 @@ def _export_dx(self, filename, type=None, typequote='"', **kwargs): ) dx = OpenDX.field('density', components=components, comments=comments) dx.write(filename) - - def _export_dxgz(self, filename, type=None, typequote='"', **kwargs): - self._export_dx(filename, type, typequote, **kwargs) - root, ext = os.path.splitext(filename) - filename = root + '.dx' - - with open(filename, 'rb') as in_file: - gz_filename = filename + '.gz' - with gzip.open(gz_filename, 'wb') as out_file: - for l in in_file: - out_file.write(l) - os.remove(filename) + if ext == '.gz': + with open(filename, 'rb') as in_file: + with gzip.open(root + ext, 'wb') as out_file: + for l in in_file: + out_file.write(l) + os.remove(filename) def save(self, filename): """Save a grid object to .pickle diff --git a/gridData/tests/datafiles/__init__.py b/gridData/tests/datafiles/__init__.py index dd0f65b..4e3f21f 100644 --- a/gridData/tests/datafiles/__init__.py +++ b/gridData/tests/datafiles/__init__.py @@ -2,10 +2,9 @@ from pkg_resources import resource_filename -__all__ = ["DX", "DXGZ", "CCP4", "gOpenMol"] +__all__ = ["DX", "CCP4", "gOpenMol"] DX = resource_filename(__name__, 'test.dx') -DXGZ = resource_filename(__name__, 'test.dx.gz') CCP4 = resource_filename(__name__, 'test.ccp4') # from http://www.ebi.ac.uk/pdbe/coordinates/files/1jzv.ccp4 # (see issue #57) diff --git a/gridData/tests/test_dx.py b/gridData/tests/test_dx.py index 5aea8b3..a169928 100644 --- a/gridData/tests/test_dx.py +++ b/gridData/tests/test_dx.py @@ -10,19 +10,9 @@ from . import datafiles -def test_read_dx(): - g = Grid(datafiles.DX) - POINTS = 8 - ref = np.ones(POINTS) - ref[4] = 1e-6 - ref[5] = -1e+6 - assert_equal(g.grid.flat, ref) - assert_equal(g.grid.size, POINTS) - assert_equal(g.delta, np.ones(3)) - assert_equal(g.origin, np.array([20.1, 3., -10.])) - -def test_read_dxgz(): - g = Grid(datafiles.DXGZ, file_format='DXGZ') +@pytest.mark.parametrize("infile", [datafiles.DX, datafiles.DX+'.gz']) +def test_read_dx(infile): + g = Grid(infile) POINTS = 8 ref = np.ones(POINTS) ref[4] = 1e-6 @@ -32,6 +22,7 @@ def test_read_dxgz(): assert_equal(g.delta, np.ones(3)) assert_equal(g.origin, np.array([20.1, 3., -10.])) +@pytest.mark.parametrize("outfile", ["grid.dx", "grid.dx.gz"]) @pytest.mark.parametrize("nptype,dxtype", [ ("float16", "float"), ("float32", "float"), @@ -45,7 +36,7 @@ def test_read_dxgz(): ("int8", "signed byte"), ("uint8", "byte"), ]) -def test_write_dx(tmpdir, nptype, dxtype, counts=100, ndim=3): +def test_write_dx(tmpdir, nptype, dxtype, outfile, counts=100, ndim=3): # conversion from numpy array to DX file h, edges = np.histogramdd(np.random.random((counts, ndim)), bins=10) @@ -57,7 +48,6 @@ def test_write_dx(tmpdir, nptype, dxtype, counts=100, ndim=3): assert_equal(g.grid.sum(), counts) with tmpdir.as_cwd(): - outfile = "grid.dx" g.export(outfile) g2 = Grid(outfile) @@ -76,54 +66,10 @@ def test_write_dx(tmpdir, nptype, dxtype, counts=100, ndim=3): assert_equal(out_dxtype, dxtype) -@pytest.mark.parametrize("nptype,dxtype", [ - ("float16", "float"), - ("float32", "float"), - ("float64", "double"), - ("int64", "int"), - ("int32", "int"), - ("uint32", "unsigned int"), - ("uint64", "unsigned int"), - ("int16", "short"), - ("uint16", "unsigned short"), - ("int8", "signed byte"), - ("uint8", "byte"), -]) -def test_write_dxgz(tmpdir, nptype, dxtype, counts=100, ndim=3): - # conversion from numpy array to DXGZ file - - h, edges = np.histogramdd(np.random.random((counts, ndim)), bins=10) - g = Grid(h, edges) - - # hack the grid to be a different dtype - g.grid = g.grid.astype(nptype) - - assert_equal(g.grid.sum(), counts) - - with tmpdir.as_cwd(): - outfile = "grid" - g.export(outfile, file_format='DXGZ') - outfile += ".dx.gz" - g2 = Grid(outfile, file_format='DXGZ') - - # check that dxtype was written - dx = gridData.OpenDX.field(0) - dx.read(outfile, gz=True) - data = dx.components['data'] - out_dxtype = data.type - - assert_almost_equal(g.grid, g2.grid, - err_msg="written grid does not match original") - assert_almost_equal( - g.delta, g2.delta, - decimal=6, - err_msg="deltas of written grid do not match original") - - assert_equal(out_dxtype, dxtype) - +@pytest.mark.parametrize("outfile", ["grid.dx", "grid.dx.gz"]) @pytest.mark.parametrize('nptype', ("complex64", "complex128", "bool_")) @pytest.mark.filterwarnings("ignore:array dtype.name =") -def test_write_dx_ValueError(tmpdir, nptype, counts=100, ndim=3): +def test_write_dx_ValueError(tmpdir, nptype, outfile, counts=100, ndim=3): h, edges = np.histogramdd(np.random.random((counts, ndim)), bins=10) g = Grid(h, edges) @@ -132,19 +78,5 @@ def test_write_dx_ValueError(tmpdir, nptype, counts=100, ndim=3): with pytest.raises(ValueError): with tmpdir.as_cwd(): - outfile = "grid.dx" g.export(outfile) -@pytest.mark.parametrize('nptype', ("complex64", "complex128", "bool_")) -@pytest.mark.filterwarnings("ignore:array dtype.name =") -def test_write_dxgz_ValueError(tmpdir, nptype, counts=100, ndim=3): - h, edges = np.histogramdd(np.random.random((counts, ndim)), bins=10) - g = Grid(h, edges) - - # hack the grid to be a different dtype - g.grid = g.grid.astype(nptype) - - with pytest.raises(ValueError): - with tmpdir.as_cwd(): - outfile = "grid" - g.export(outfile, file_format='DXGZ') From e838aac6c8118f0b9d8354628c175dd30f1dba3e Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 6 Nov 2019 00:05:19 +0000 Subject: [PATCH 04/11] update CHANGELOG --- CHANGELOG | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index e75bd5b..5e24a5f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,15 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) ------------------------------------------------------------------------------ +11/06/2019 eloyfelix, orbeckst + + * 0.6.0 + + Changes + + * Allow parsing/writting gzipped DX files + + 05/16/2019 giacomofiorin, orbeckst * 0.5.0 From 5ab7f2ec8fefd2af54e8597b10c4d8c5dcb23b43 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 6 Nov 2019 00:05:39 +0000 Subject: [PATCH 05/11] update CHANGELOG --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 5e24a5f..c8a5ea8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,7 +17,7 @@ The rules for this file: * 0.6.0 - Changes + Enhancements * Allow parsing/writting gzipped DX files From 1989629b49b3b672cdd012790f1370241065c631 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 6 Nov 2019 00:13:13 +0000 Subject: [PATCH 06/11] keep DXGZ datafile... --- gridData/tests/datafiles/__init__.py | 1 + gridData/tests/test_dx.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gridData/tests/datafiles/__init__.py b/gridData/tests/datafiles/__init__.py index 4e3f21f..e38acf1 100644 --- a/gridData/tests/datafiles/__init__.py +++ b/gridData/tests/datafiles/__init__.py @@ -5,6 +5,7 @@ __all__ = ["DX", "CCP4", "gOpenMol"] DX = resource_filename(__name__, 'test.dx') +DXGZ = resource_filename(__name__, 'test.dx.gz') CCP4 = resource_filename(__name__, 'test.ccp4') # from http://www.ebi.ac.uk/pdbe/coordinates/files/1jzv.ccp4 # (see issue #57) diff --git a/gridData/tests/test_dx.py b/gridData/tests/test_dx.py index a169928..35fb1a4 100644 --- a/gridData/tests/test_dx.py +++ b/gridData/tests/test_dx.py @@ -10,7 +10,7 @@ from . import datafiles -@pytest.mark.parametrize("infile", [datafiles.DX, datafiles.DX+'.gz']) +@pytest.mark.parametrize("infile", [datafiles.DX, datafiles.DXGZ]) def test_read_dx(infile): g = Grid(infile) POINTS = 8 From cbdbc367660fc02b5e0ce0c7938a192495d11f83 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 6 Nov 2019 11:17:00 +0000 Subject: [PATCH 07/11] fixed CHANGELOG, compressed writing moved to dx.write(), added DXGZ datafile --- CHANGELOG | 4 +-- gridData/OpenDX.py | 62 ++++++++++++++++++++++++++++++++++++---------- gridData/core.py | 8 ++---- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c8a5ea8..2c2a4ed 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,13 +13,13 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) ------------------------------------------------------------------------------ -11/06/2019 eloyfelix, orbeckst +??/??/2019 eloyfelix * 0.6.0 Enhancements - * Allow parsing/writting gzipped DX files + * Allow parsing/writing gzipped DX files 05/16/2019 giacomofiorin, orbeckst diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index f7cb0ef..93f9d4e 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -166,6 +166,7 @@ from six import next from six.moves import range import gzip +import bz2 import warnings @@ -185,8 +186,11 @@ def write(self,file,optstring="",quote=False): # Only use a *single* space between tokens; both chimera's and pymol's DX parser # does not properly implement the OpenDX specs and produces garbage with multiple # spaces. (Chimera 1.4.1, PyMOL 1.3) - file.write('object '+classid+' class '+str(self.name)+' '+\ - optstring+'\n') + to_write = 'object '+classid+' class '+str(self.name)+' '+\ + optstring+'\n' + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + file.write(to_write) def read(self,file): raise NotImplementedError('Reading is currently not supported.') @@ -229,11 +233,18 @@ def __init__(self,classid,shape=None,origin=None,delta=None,**kwargs): raise NotImplementedError('Only regularly spaced grids allowed, ' 'not delta={}'.format(self.delta)) def write(self,file): - DXclass.write(self,file, - ('counts '+self.ndformat(' %d')) % tuple(self.shape)) - file.write('origin %f %f %f\n' % tuple(self.origin)) + to_write = ('counts '+self.ndformat(' %d')) % tuple(self.shape) + DXclass.write(self, file, to_write) + to_write = 'origin %f %f %f\n' % tuple(self.origin) + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + file.write(to_write) for delta in self.delta: - file.write(('delta '+self.ndformat(' %f')+'\n') % tuple(delta)) + to_write = ('delta '+self.ndformat(' %f')+'\n') % tuple(delta) + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + file.write(to_write) + def edges(self): """Edges of the grid cells, origin at centre of 0,0,..,0 grid cell. @@ -386,12 +397,24 @@ def write(self, file): while 1: try: for i in range(values_per_line): - file.write(fmt_string.format(next(values)) + "\t") - file.write('\n') + to_write = fmt_string.format(next(values)) + "\t" + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + file.write(to_write) + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + file.write(b'\n') + else: + file.write('\n') except StopIteration: - file.write('\n') + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + file.write(b'\n') + else: + file.write('\n') break - file.write('attribute "dep" string "positions"\n') + to_write = 'attribute "dep" string "positions"\n' + if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + file.write(to_write) class field(DXclass): """OpenDX container class @@ -460,6 +483,13 @@ def __init__(self,classid='0',components=None,comments=None): self.components = components self.comments= comments + def _openfile(self, filename): + """Returns a regular or gz file descriptor""" + if filename.endswith('.gz'): + return gzip.open(filename, 'wb') + else: + return open(filename, 'w') + def write(self, filename): """Write the complete dx object to the file. @@ -472,17 +502,23 @@ def write(self, filename): """ # comments (VMD chokes on lines of len > 80, so truncate) maxcol = 80 - with open(str(filename), 'w') as outfile: + with self._openfile(str(filename)) as outfile: for line in self.comments: comment = '# '+str(line) - outfile.write(comment[:maxcol]+'\n') + to_write = comment[:maxcol]+'\n' + if isinstance(outfile, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + outfile.write(to_write) # each individual object for component,object in self.sorted_components(): object.write(outfile) # the field object itself DXclass.write(self,outfile,quote=True) for component,object in self.sorted_components(): - outfile.write('component "%s" value %s\n' % (component,str(object.id))) + to_write = 'component "%s" value %s\n' % (component,str(object.id)) + if isinstance(outfile, (gzip.GzipFile, bz2.BZ2File)): + to_write = to_write.encode() + outfile.write(to_write) def read(self, file): """Read DX field from file. diff --git a/gridData/core.py b/gridData/core.py index b66ff13..20ada11 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -549,13 +549,9 @@ def _export_dx(self, filename, type=None, typequote='"', **kwargs): data=OpenDX.array(3, self.grid, type=type, typequote=typequote), ) dx = OpenDX.field('density', components=components, comments=comments) - dx.write(filename) if ext == '.gz': - with open(filename, 'rb') as in_file: - with gzip.open(root + ext, 'wb') as out_file: - for l in in_file: - out_file.write(l) - os.remove(filename) + filename = root + ext + dx.write(filename) def save(self, filename): """Save a grid object to .pickle From fbfd0b8843767e394cdc6aee9890acd79a5c2a10 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Wed, 6 Nov 2019 20:51:04 +0000 Subject: [PATCH 08/11] add write_line function --- gridData/OpenDX.py | 70 ++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index 93f9d4e..2870cf4 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -166,7 +166,6 @@ from six import next from six.moves import range import gzip -import bz2 import warnings @@ -179,19 +178,24 @@ def __init__(self,classid): self.component = None # component type self.D = None # dimensions - def write(self,file,optstring="",quote=False): + def write(self, file, optstring="", quote=False): """write the 'object' line; additional args are packed in string""" classid = str(self.id) if quote: classid = '"'+classid+'"' # Only use a *single* space between tokens; both chimera's and pymol's DX parser # does not properly implement the OpenDX specs and produces garbage with multiple # spaces. (Chimera 1.4.1, PyMOL 1.3) - to_write = 'object '+classid+' class '+str(self.name)+' '+\ - optstring+'\n' - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): + to_write = 'object '+classid+' class '+str(self.name)+' '+optstring+'\n' + if isinstance(file, gzip.GzipFile): to_write = to_write.encode() file.write(to_write) + def write_line(self, file, line="", quote=False): + """write a line to the file""" + if isinstance(file, gzip.GzipFile): + line = line.encode() + file.write(line) + def read(self,file): raise NotImplementedError('Reading is currently not supported.') @@ -232,18 +236,14 @@ def __init__(self,classid,shape=None,origin=None,delta=None,**kwargs): # anything more complicated raise NotImplementedError('Only regularly spaced grids allowed, ' 'not delta={}'.format(self.delta)) - def write(self,file): - to_write = ('counts '+self.ndformat(' %d')) % tuple(self.shape) - DXclass.write(self, file, to_write) - to_write = 'origin %f %f %f\n' % tuple(self.origin) - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - file.write(to_write) + def write(self, file): + DXclass.write(self, file, ('counts '+self.ndformat(' %d')) % + tuple(self.shape)) + DXclass.write_line(self, file, 'origin %f %f %f\n' % + tuple(self.origin)) for delta in self.delta: - to_write = ('delta '+self.ndformat(' %f')+'\n') % tuple(delta) - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - file.write(to_write) + DXclass.write_line( + self, file, ('delta '+self.ndformat(' %f')+'\n') % tuple(delta)) def edges(self): """Edges of the grid cells, origin at centre of 0,0,..,0 grid cell. @@ -397,24 +397,13 @@ def write(self, file): while 1: try: for i in range(values_per_line): - to_write = fmt_string.format(next(values)) + "\t" - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - file.write(to_write) - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - file.write(b'\n') - else: - file.write('\n') + DXclass.write_line( + self, file, fmt_string.format(next(values)) + "\t") + DXclass.write_line(self, file, '\n') except StopIteration: - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - file.write(b'\n') - else: - file.write('\n') + DXclass.write_line(self, file, '\n') break - to_write = 'attribute "dep" string "positions"\n' - if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - file.write(to_write) + DXclass.write_line(self, file, 'attribute "dep" string "positions"\n') class field(DXclass): """OpenDX container class @@ -505,20 +494,15 @@ def write(self, filename): with self._openfile(str(filename)) as outfile: for line in self.comments: comment = '# '+str(line) - to_write = comment[:maxcol]+'\n' - if isinstance(outfile, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - outfile.write(to_write) + DXclass.write_line(self, outfile, comment[:maxcol]+'\n') # each individual object - for component,object in self.sorted_components(): + for component, object in self.sorted_components(): object.write(outfile) # the field object itself - DXclass.write(self,outfile,quote=True) - for component,object in self.sorted_components(): - to_write = 'component "%s" value %s\n' % (component,str(object.id)) - if isinstance(outfile, (gzip.GzipFile, bz2.BZ2File)): - to_write = to_write.encode() - outfile.write(to_write) + DXclass.write(self, outfile, quote=True) + for component, object in self.sorted_components(): + DXclass.write_line(self, outfile, 'component "%s" value %s\n' % ( + component, str(object.id))) def read(self, file): """Read DX field from file. From 32b6e2486fa1e1f8f136e6d0d08281b297b6127f Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Thu, 7 Nov 2019 09:49:09 +0000 Subject: [PATCH 09/11] some cleanup --- gridData/OpenDX.py | 71 +++++++++++++++++++++++----------------------- gridData/core.py | 1 - setup.py | 4 +-- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index 2870cf4..b4a4805 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -178,7 +178,7 @@ def __init__(self,classid): self.component = None # component type self.D = None # dimensions - def write(self, file, optstring="", quote=False): + def write(self, stream, optstring="", quote=False): """write the 'object' line; additional args are packed in string""" classid = str(self.id) if quote: classid = '"'+classid+'"' @@ -186,17 +186,18 @@ def write(self, file, optstring="", quote=False): # does not properly implement the OpenDX specs and produces garbage with multiple # spaces. (Chimera 1.4.1, PyMOL 1.3) to_write = 'object '+classid+' class '+str(self.name)+' '+optstring+'\n' - if isinstance(file, gzip.GzipFile): + if isinstance(stream, gzip.GzipFile): to_write = to_write.encode() - file.write(to_write) + stream.write(to_write) - def write_line(self, file, line="", quote=False): + @staticmethod + def _write_line(stream, line="", quote=False): """write a line to the file""" - if isinstance(file, gzip.GzipFile): + if isinstance(stream, gzip.GzipFile): line = line.encode() - file.write(line) + stream.write(line) - def read(self,file): + def read(self, stream): raise NotImplementedError('Reading is currently not supported.') def ndformat(self,s): @@ -236,14 +237,13 @@ def __init__(self,classid,shape=None,origin=None,delta=None,**kwargs): # anything more complicated raise NotImplementedError('Only regularly spaced grids allowed, ' 'not delta={}'.format(self.delta)) - def write(self, file): - DXclass.write(self, file, ('counts '+self.ndformat(' %d')) % - tuple(self.shape)) - DXclass.write_line(self, file, 'origin %f %f %f\n' % - tuple(self.origin)) + def write(self, stream): + super(gridpositions, self).write( + stream, ('counts '+self.ndformat(' %d')) % tuple(self.shape)) + self._write_line(stream, 'origin %f %f %f\n' % tuple(self.origin)) for delta in self.delta: - DXclass.write_line( - self, file, ('delta '+self.ndformat(' %f')+'\n') % tuple(delta)) + self._write_line( + stream, ('delta '+self.ndformat(' %f')+'\n') % tuple(delta)) def edges(self): """Edges of the grid cells, origin at centre of 0,0,..,0 grid cell. @@ -263,9 +263,11 @@ def __init__(self,classid,shape=None,**kwargs): self.name = 'gridconnections' self.component = 'connections' self.shape = numpy.asarray(shape) # D dimensional shape - def write(self,file): - DXclass.write(self,file, - ('counts '+self.ndformat(' %d')) % tuple(self.shape)) + + def write(self, stream): + super(gridconnections, self).write( + stream, ('counts '+self.ndformat(' %d')) % tuple(self.shape)) + class array(DXclass): """OpenDX array class. @@ -362,12 +364,12 @@ def __init__(self, classid, array=None, type=None, typequote='"', self.type = type self.typequote = typequote - def write(self, file): + def write(self, stream): """Write the *class array* section. Parameters ---------- - file : file + stream : stream Raises ------ @@ -382,9 +384,9 @@ def write(self, file): "Use the type= keyword argument.").format( self.type, list(self.dx_types.keys()))) typelabel = (self.typequote+self.type+self.typequote) - DXclass.write(self,file, - 'type {0} rank 0 items {1} data follows'.format( - typelabel, self.array.size)) + super(array, self).write(stream, 'type {0} rank 0 items {1} data follows'.format( + typelabel, self.array.size)) + # grid data, serialized as a C array (z fastest varying) # (flat iterator is equivalent to: for x: for y: for z: grid[x,y,z]) # VMD's DX reader requires exactly 3 values per line @@ -397,13 +399,12 @@ def write(self, file): while 1: try: for i in range(values_per_line): - DXclass.write_line( - self, file, fmt_string.format(next(values)) + "\t") - DXclass.write_line(self, file, '\n') + self._write_line(stream, fmt_string.format(next(values)) + "\t") + self._write_line(stream, '\n') except StopIteration: - DXclass.write_line(self, file, '\n') + self._write_line(stream, '\n') break - DXclass.write_line(self, file, 'attribute "dep" string "positions"\n') + self._write_line(stream, 'attribute "dep" string "positions"\n') class field(DXclass): """OpenDX container class @@ -472,8 +473,8 @@ def __init__(self,classid='0',components=None,comments=None): self.components = components self.comments= comments - def _openfile(self, filename): - """Returns a regular or gz file descriptor""" + def _openfile_writing(self, filename): + """Returns a regular or gz file stream for writing""" if filename.endswith('.gz'): return gzip.open(filename, 'wb') else: @@ -491,20 +492,20 @@ def write(self, filename): """ # comments (VMD chokes on lines of len > 80, so truncate) maxcol = 80 - with self._openfile(str(filename)) as outfile: + with self._openfile_writing(str(filename)) as outfile: for line in self.comments: comment = '# '+str(line) - DXclass.write_line(self, outfile, comment[:maxcol]+'\n') + self._write_line(outfile, comment[:maxcol]+'\n') # each individual object for component, object in self.sorted_components(): object.write(outfile) # the field object itself - DXclass.write(self, outfile, quote=True) + super(field, self).write(outfile, quote=True) for component, object in self.sorted_components(): - DXclass.write_line(self, outfile, 'component "%s" value %s\n' % ( + self._write_line(outfile, 'component "%s" value %s\n' % ( component, str(object.id))) - def read(self, file): + def read(self, stream): """Read DX field from file. dx = OpenDX.field.read(dxfile) @@ -512,7 +513,7 @@ def read(self, file): The classid is discarded and replaced with the one from the file. """ DXfield = self - p = DXParser(file) + p = DXParser(stream) p.parse(DXfield) def add(self,component,DXobj): diff --git a/gridData/core.py b/gridData/core.py index 20ada11..72a019c 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -32,7 +32,6 @@ import os import errno -import gzip import numpy diff --git a/setup.py b/setup.py index 6226115..9a5c100 100644 --- a/setup.py +++ b/setup.py @@ -38,8 +38,8 @@ 'Topic :: Software Development :: Libraries :: Python Modules', ], packages=find_packages(exclude=[]), - package_data={'gridData': ['tests/datafiles/*.dx', 'tests/datafiles/*.ccp4', - 'tests/datafiles/*.plt']}, + package_data={'gridData': ['tests/datafiles/*.dx', 'tests/datafiles/*.dx.gz', + 'tests/datafiles/*.ccp4', 'tests/datafiles/*.plt']}, install_requires=['numpy>=1.0.3', 'six', 'scipy'], tests_require=['pytest', 'numpy'], zip_safe=True, From 0f4b6c75f248f57ab864e470b720a5919658c33c Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Thu, 7 Nov 2019 11:18:28 +0000 Subject: [PATCH 10/11] some cleanup --- gridData/OpenDX.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index b4a4805..f48b362 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -186,9 +186,7 @@ def write(self, stream, optstring="", quote=False): # does not properly implement the OpenDX specs and produces garbage with multiple # spaces. (Chimera 1.4.1, PyMOL 1.3) to_write = 'object '+classid+' class '+str(self.name)+' '+optstring+'\n' - if isinstance(stream, gzip.GzipFile): - to_write = to_write.encode() - stream.write(to_write) + self._write_line(stream, to_write) @staticmethod def _write_line(stream, line="", quote=False): From 5dbd3e2fde75ca85104dcf78c5587bcb79d04b62 Mon Sep 17 00:00:00 2001 From: Eloy Felix Date: Thu, 7 Nov 2019 11:48:50 +0000 Subject: [PATCH 11/11] some cleanup --- gridData/OpenDX.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridData/OpenDX.py b/gridData/OpenDX.py index f48b362..9333445 100644 --- a/gridData/OpenDX.py +++ b/gridData/OpenDX.py @@ -701,10 +701,10 @@ def parse(self, DXfield): if self.filename.endswith('.gz'): with gzip.open(self.filename, 'rt') as self.dxfile: - self.use_parser('general') + self.use_parser('general') else: with open(self.filename, 'r') as self.dxfile: - self.use_parser('general') # parse the whole file and populate self.objects + self.use_parser('general') # parse the whole file and populate self.objects # assemble field from objects for o in self.objects: