From 5b5278390f72e5953eab6e67929881214081299a Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 12 Jan 2016 12:29:15 +0000 Subject: [PATCH 1/3] Refactored MOL2 encoding slightly Uses TopologyGroups better --- package/MDAnalysis/coordinates/MOL2.py | 60 ++++++++++++++------------ 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/package/MDAnalysis/coordinates/MOL2.py b/package/MDAnalysis/coordinates/MOL2.py index 9176894a8ee..179bd6c4b5b 100644 --- a/package/MDAnalysis/coordinates/MOL2.py +++ b/package/MDAnalysis/coordinates/MOL2.py @@ -247,34 +247,40 @@ def close(self): def encode_block(self, obj): traj = obj.universe.trajectory - # Make sure Universe has loaded bonds - obj.universe.bonds - - bonds = set() - for a in obj.atoms: - bonds.update(a.bonds) - bonds = sorted([(bond[0].id, bond[1].id, bond.order) for bond in bonds]) - mapping = {a.id: i for i, a in enumerate(obj.atoms)} - - atom_lines = ["{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f} {5:>4} {6} {7} " - "{8:>7.4f}".format(mapping[a.id] + 1, a.name, - float(a.pos[0]), - float(a.pos[1]), - float(a.pos[2]), a.type, - a.resid, a.resname, - a.charge) for a in obj.atoms] - atom_lines = ["@ATOM"] + atom_lines + ["\n"] - atom_lines = "\n".join(atom_lines) - - bonds = [(atom1, atom2, order) for atom1, atom2, order in bonds if atom1 in mapping and atom2 in mapping] - bond_lines = ["{0:>5} {1:>5} {2:>5} {3:>2}" - "".format(bid + 1, - mapping[atom1] + 1, - mapping[atom2] + 1, - order) for bid, (atom1, atom2, order) in enumerate(bonds)] - bond_lines = ["@BOND"] + bond_lines + ["\n"] + # Need to remap atom indices to 1 based in this selection + mapping = {a: i for i, a in enumerate(obj.atoms, start=1)} + + # Grab only bonds between atoms in the obj + # ie none that extend out of it + bondgroup = obj.bonds.atomgroup_intersection(obj, strict=True) + bonds = sorted((b[0], b[1], b.order) for b in bondgroup) + bond_lines = ["@BOND"] + bond_lines.extend("{0:>5} {1:>5} {2:>5} {3:>2}" + "".format(bid, + mapping[atom1], + mapping[atom2], + order) + for bid, (atom1, atom2, order)in enumerate( + bonds, start=1)) + bond_lines.append("\n") bond_lines = "\n".join(bond_lines) + atom_lines = ["@ATOM"] + atom_lines.extend("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f}" + " {5:>4} {6} {7} {8:>7.4f}" + "".format(mapping[a], + a.name, + a.position[0], + a.position[1], + a.position[2], + a.type, + a.resid, + a.resname, + a.charge) + for a in obj.atoms) + atom_lines.append("\n") + atom_lines = "\n".join(atom_lines) + try: substructure = traj.substructure[traj.frame] except AttributeError: @@ -284,7 +290,7 @@ def encode_block(self, obj): molecule = traj.molecule[traj.frame] check_sums = molecule[1].split() - check_sums[0], check_sums[1] = str(len(obj.atoms)), str(len(bonds)) + check_sums[0], check_sums[1] = str(len(obj.atoms)), str(len(bondgroup)) molecule[1] = "{0}\n".format(" ".join(check_sums)) molecule = ["@MOLECULE\n"] + molecule From 06bd71feda8ca021c5c3f08d3d3cc69a4f29bd47 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 12 Jan 2016 13:09:32 +0000 Subject: [PATCH 2/3] Fixes Issue #521 MOL2Reader now reads `molecule` and `substructure` into `ts.data` under the keys `molecule` and `substructure` Fixed MOL2Reader not reading molecule and substructure on init. Fixed MOL2Writer loading the frame anew before writing (and overwriting any changes made to the frame by the user) MOL2Writer no longer writes an entire trajectory using the `write` method and instead only writes a single frame (to conform with all other Writers) To write an entire trajectory, use a loop --- package/CHANGELOG | 9 +- package/MDAnalysis/coordinates/MOL2.py | 114 +++++++----------- .../MDAnalysisTests/coordinates/test_mol2.py | 20 ++- 3 files changed, 65 insertions(+), 78 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index e82a32ec15c..44a460b8a68 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -45,10 +45,10 @@ Enhancement * Added analysis.rdf, with InterRDF tool. (Issue #460) * Made Reader.check_slice_indices a public method (Issue #604) * analysis.helanal.helanal_main() now returns results as dict - * Added keyword to update selection every frame in density calculation (Issue #584) + * Added keyword to update selection every frame in density calculation (Issue #584) * New keywords start, stop, step for density.density_from_Universe() to slice a trajectory. - + * MOL2Reader now reads molecule and substructure into ts.data Changes @@ -63,7 +63,7 @@ Changes the change from 1- to 0-based ts.frame counting) * removed superfluous analysis.density.density_from_trajectory(); use density_from_Universe(TOPOL, TRAJ) instead. - + * MOL2Writer.write now only writes a single frame (Issue #521) Fixes @@ -92,6 +92,9 @@ Fixes * Fixed broken analysis.helanal.helanal_trajectory() and helanal_main() * Fixed lib.util.greedy_splitext() (now returns full path) + * Fixed MOL2Reader not reading molecule and substructure on init + (Issue #521) + * Fixed MOL2Writer rereading frames when writing them (Issue #521) 10/08/15 diff --git a/package/MDAnalysis/coordinates/MOL2.py b/package/MDAnalysis/coordinates/MOL2.py index 179bd6c4b5b..6808b98b101 100644 --- a/package/MDAnalysis/coordinates/MOL2.py +++ b/package/MDAnalysis/coordinates/MOL2.py @@ -67,24 +67,15 @@ def __init__(self, filename, **kwargs): if "@MOLECULE" in line: blocks.append({"start_line": i, "lines": []}) blocks[-1]["lines"].append(line) + self.n_frames = len(blocks) + self.frames = blocks - block = blocks[0] - - sections, coords = self.parse_block(block) - + sections, coords = self.parse_block(blocks[0]) self.n_atoms = len(coords) - self.ts = self._Timestep.from_coordinates(np.array(coords, dtype=np.float32), - **self._ts_kwargs) - self.ts.frame = 0 # 0-based frame number as starting frame - if self.convert_units: - self.convert_pos_from_native(self.ts._pos) # in-place ! - self.convert_pos_from_native(self.ts._unitcell[:3]) # in-place ! (only lengths) + self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) - self.molecule = {} - self.substructure = {} - self.frames = blocks - self.n_frames = len(blocks) + self.ts = self._read_frame(0) def parse_block(self, block): sections = {} @@ -111,7 +102,7 @@ def parse_block(self, block): aid, name, x, y, z, atom_type, resid, resname, charge = a.split() x, y, z = float(x), float(y), float(z) coords.append((x, y, z)) - coords = np.array(coords) + coords = np.array(coords, dtype=np.float32) return sections, coords def _read_next_timestep(self, ts=None): @@ -119,32 +110,39 @@ def _read_next_timestep(self, ts=None): ts = self.ts else: # TODO: cleanup _read_frame() to use a "free" Timestep - raise NotImplementedError("PrimitiveMOL2Reader cannot assign to a timestep") + raise NotImplementedError("MOL2Reader cannot assign to a timestep") frame = self.frame + 1 return self._read_frame(frame) def _read_frame(self, frame): unitcell = np.zeros(6, dtype=np.float32) - block = self.frames[frame] + try: + block = self.frames[frame] + except IndexError: + raise IOError("Invalid frame {0} for trajectory with length {1}" + "".format(frame, len(self))) sections, coords = self.parse_block(block) - self.molecule[frame] = sections["molecule"] - self.substructure[frame] = sections["substructure"] + self.ts.data['molecule'] = sections["molecule"] + self.ts.data['substructure'] = sections["substructure"] # check if atom number changed - if len(coords) != len(self.ts._pos): + if len(coords) != self.n_atoms: raise ValueError( - "PrimitiveMOL2Reader assumes that the number of atoms remains unchanged between frames; the current " - "frame has %d, the next frame has %d atoms" % ( - len(self.ts._pos), len(coords))) + "MOL2Reader assumes that the number of atoms remains unchanged" + " between frames; the current " + "frame has {}, the next frame has {1} atoms" + "".format(self.n_atoms, len(coords))) self.ts.positions = np.array(coords, dtype=np.float32) - self.ts._unitcell[:] = unitcell + self.ts.unitcell = unitcell if self.convert_units: - self.convert_pos_from_native(self.ts._pos) # in-place ! - self.convert_pos_from_native(self.ts._unitcell[:3]) # in-place ! (only lengths) + # in-place ! + self.convert_pos_from_native(self.ts._pos) + self.convert_pos_from_native(self.ts._unitcell[:3]) self.ts.frame = frame + return self.ts def _reopen(self): @@ -211,21 +209,16 @@ class MOL2Writer(base.Writer): format = 'MOL2' units = {'time': None, 'length': 'Angstrom'} - def __init__(self, filename, n_atoms=None, start=0, step=1, + def __init__(self, filename, n_atoms=None, convert_units=None, multiframe=None): """Create a new MOL2Writer :Arguments: *filename* name of output file - *start* - starting timestep - *step* - skip between subsequent timesteps *convert_units* units are converted to the MDAnalysis base format; ``None`` selects the value of :data:`MDAnalysis.core.flags` ['convert_lengths'] - """ self.filename = filename if convert_units is None: @@ -233,11 +226,6 @@ def __init__(self, filename, n_atoms=None, start=0, step=1, self.convert_units = convert_units # convert length and time to base units self.frames_written = 0 - if start < 0: - raise ValueError("'Start' must be a positive value") - - self.start = start - self.step = step self.file = util.anyopen(self.filename, 'w') # open file on init @@ -245,7 +233,13 @@ def close(self): self.file.close() def encode_block(self, obj): + """ + Parameters + ---------- + obj : AtomGroup or Universe + """ traj = obj.universe.trajectory + ts = traj.ts # Need to remap atom indices to 1 based in this selection mapping = {a: i for i, a in enumerate(obj.atoms, start=1)} @@ -282,13 +276,12 @@ def encode_block(self, obj): atom_lines = "\n".join(atom_lines) try: - substructure = traj.substructure[traj.frame] - except AttributeError: + substructure = ["@SUBSTRUCTURE\n"] + substructure.extend(ts.data['substructure']) + except KeyError: raise NotImplementedError("No MOL2 substructure type found in traj") - substructure = ["@SUBSTRUCTURE\n"] + substructure - - molecule = traj.molecule[traj.frame] + molecule = ts.data['molecule'] check_sums = molecule[1].split() check_sums[0], check_sums[1] = str(len(obj.atoms)), str(len(bondgroup)) molecule[1] = "{0}\n".format(" ".join(check_sums)) @@ -297,41 +290,20 @@ def encode_block(self, obj): return "".join(molecule) + atom_lines + bond_lines + "".join(substructure) def write(self, obj): - """Write object *obj* at current trajectory frame to file. - - *obj* can be a selection (i.e. a - :class:`~MDAnalysis.core.AtomGroup.AtomGroup`) or a whole - :class:`~MDAnalysis.core.AtomGroup.Universe`. + """Write a new frame to the MOL2 file. - :Arguments: - *obj* - :class:`~MDAnalysis.core.AtomGroup.AtomGroup` or - :class:`~MDAnalysis.core.AtomGroup.Universe` + Parameters + ---------- + obj : AtomGroup or Universe """ - - start, step = self.start, self.step - traj = obj.universe.trajectory - - # Start from trajectory[0]/frame 1, if there are more than 1 frame. - # If there is only 1 frame, the traj.frames is not like a python list: - # accessing trajectory[-1] raises key error. - if not start and traj.n_frames > 1: - start = traj.frame - - for framenumber in xrange(start, len(traj), step): - traj[framenumber] - self.write_next_timestep(obj) - - self.close() - - # Set the trajectory to the starting position - traj[start] + self.write_next_timestep(obj) def write_next_timestep(self, obj): """Write a new frame to the MOL2 file. - *obj* can be a :class:`~MDAnalysis.core.AtomGroup.AtomGroup` - or a :class:`~MDAnalysis.core.AtomGroup.Universe`. + Parameters + ---------- + obj : AtomGroup or Universe """ block = self.encode_block(obj) self.file.writelines(block) diff --git a/testsuite/MDAnalysisTests/coordinates/test_mol2.py b/testsuite/MDAnalysisTests/coordinates/test_mol2.py index 4b7f8f1b5b4..895c8b9c0b4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_mol2.py +++ b/testsuite/MDAnalysisTests/coordinates/test_mol2.py @@ -19,6 +19,7 @@ from MDAnalysisTests.datafiles import mol2_molecules, mol2_molecule, mol2_broken_molecule from MDAnalysis import Universe +import MDAnalysis as mda class TestMol2(TestCase): @@ -45,10 +46,7 @@ def test_write(self): ref.atoms.write(self.outfile) u = Universe(self.outfile) assert_equal(len(u.atoms), len(ref.atoms)) - assert_equal(len(u.trajectory), len(ref.trajectory)) - assert_array_equal(u.atoms.positions, ref.atoms.positions) - u.trajectory[199] - ref.trajectory[199] + assert_equal(len(u.trajectory), 1) assert_array_equal(u.atoms.positions, ref.atoms.positions) def test_write_selection(self): @@ -59,6 +57,20 @@ def test_write_selection(self): gr1 = u.select_atoms("name C*") assert_equal(len(gr0), len(gr1)) + def test_write_in_loop(self): + ref = Universe(mol2_molecules) + + with mda.Writer(self.outfile) as W: + for ts in ref.trajectory: + W.write(ref.atoms) + u = Universe(self.outfile) + assert_equal(len(u.atoms), len(ref.atoms)) + assert_equal(len(u.trajectory), len(ref.trajectory)) + assert_array_equal(u.atoms.positions, ref.atoms.positions) + u.trajectory[199] + ref.trajectory[199] + assert_array_equal(u.atoms.positions, ref.atoms.positions) + def test_broken_molecule(self): assert_raises(ValueError, Universe, mol2_broken_molecule) From df5d7a210ba145b52c7d630de108a5ef2f716f1e Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 12 Jan 2016 22:20:46 +0000 Subject: [PATCH 3/3] QC fixes --- package/MDAnalysis/coordinates/MOL2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/MOL2.py b/package/MDAnalysis/coordinates/MOL2.py index 6808b98b101..67f8e4b7607 100644 --- a/package/MDAnalysis/coordinates/MOL2.py +++ b/package/MDAnalysis/coordinates/MOL2.py @@ -132,7 +132,7 @@ def _read_frame(self, frame): raise ValueError( "MOL2Reader assumes that the number of atoms remains unchanged" " between frames; the current " - "frame has {}, the next frame has {1} atoms" + "frame has {0}, the next frame has {1} atoms" "".format(self.n_atoms, len(coords))) self.ts.positions = np.array(coords, dtype=np.float32)