From 0cd61ac0ed2b2c65558fa83fed2a927aa363ac0f Mon Sep 17 00:00:00 2001 From: "William H.P. Nielsen" Date: Fri, 27 Oct 2017 12:47:54 +0200 Subject: [PATCH] add debugging and make the final write more robust (#822) * add debugging and make the final write more robust * add test based on bug report * logging: update old loggers * rename instrument to avoid error in test suite * save even in case of keyboardinterrupt * insert argument in all places --- qcodes/data/data_set.py | 29 +++++++++++++++++--------- qcodes/data/format.py | 8 ++++++-- qcodes/data/gnuplot_format.py | 23 +++++++++++++++++---- qcodes/data/hdf5_format.py | 13 ++++++++---- qcodes/loops.py | 38 ++++++++++++++++++++++++----------- qcodes/tests/test_loop.py | 28 ++++++++++++++++++++++++-- 6 files changed, 106 insertions(+), 33 deletions(-) diff --git a/qcodes/data/data_set.py b/qcodes/data/data_set.py index 097dcc1de70..0213d0b4867 100644 --- a/qcodes/data/data_set.py +++ b/qcodes/data/data_set.py @@ -11,6 +11,8 @@ from .location import FormatLocation from qcodes.utils.helpers import DelegateAttributes, full_class, deep_update +log = logging.getLogger(__name__) + def new_data(location=None, loc_record=None, name=None, overwrite=False, io=None, **kwargs): @@ -246,14 +248,14 @@ def complete(self, delay=1.5): Args: delay (float): seconds between iterations. Default 1.5 """ - logging.info( + log.info( 'waiting for DataSet <{}> to complete'.format(self.location)) failing = {key: False for key in self.background_functions} completed = False while True: - logging.info('DataSet: {:.0f}% complete'.format( + log.info('DataSet: {:.0f}% complete'.format( self.fraction_complete() * 100)) # first check if we're done @@ -264,13 +266,13 @@ def complete(self, delay=1.5): # because we want things like live plotting to get the final data for key, fn in list(self.background_functions.items()): try: - logging.debug('calling {}: {}'.format(key, repr(fn))) + log.debug('calling {}: {}'.format(key, repr(fn))) fn() failing[key] = False except Exception: - logging.info(format_exc()) + log.info(format_exc()) if failing[key]: - logging.warning( + log.warning( 'background function {} failed twice in a row, ' 'removing it'.format(key)) del self.background_functions[key] @@ -282,7 +284,7 @@ def complete(self, delay=1.5): # but only sleep if we're not already finished time.sleep(delay) - logging.info('DataSet <{}> is complete'.format(self.location)) + log.info('DataSet <{}> is complete'.format(self.location)) def get_changes(self, synced_indices): """ @@ -387,8 +389,11 @@ def store(self, loop_indices, ids_values): self.last_store = time.time() if (self.write_period is not None and time.time() > self.last_write + self.write_period): + log.debug('Attempting to write') self.write() self.last_write = time.time() + else: + log.debug('.store method: This is not the right time to write') def default_parameter_name(self, paramname='amplitude'): """ Return name of default parameter for plotting @@ -464,7 +469,7 @@ def read_metadata(self): return self.formatter.read_metadata(self) - def write(self, write_metadata=False): + def write(self, write_metadata=False, only_complete=True): """ Writes updates to the DataSet to storage. N.B. it is recommended to call data_set.finalize() when a DataSet is @@ -472,6 +477,9 @@ def write(self, write_metadata=False): Args: write_metadata (bool): write the metadata to disk + only_complete (bool): passed on to the match_save_range inside + self.formatter.write. Used to ensure that all new data gets + saved even when some columns are strange. """ if self.location is False: return @@ -479,7 +487,8 @@ def write(self, write_metadata=False): self.formatter.write(self, self.io, self.location, - write_metadata=write_metadata) + write_metadata=write_metadata, + only_complete=only_complete) def write_copy(self, path=None, io_manager=None, location=None): """ @@ -560,7 +569,9 @@ def finalize(self): Also closes the data file(s), if the ``Formatter`` we're using supports that. """ - self.write() + log.debug('Finalising the DataSet. Writing.') + # write all new data, not only (to?) complete columns + self.write(only_complete=False) if hasattr(self.formatter, 'close_file'): self.formatter.close_file(self) diff --git a/qcodes/data/format.py b/qcodes/data/format.py index cb0b8fe6756..c3fc5831593 100644 --- a/qcodes/data/format.py +++ b/qcodes/data/format.py @@ -48,7 +48,7 @@ class Formatter: ArrayGroup = namedtuple('ArrayGroup', 'shape set_arrays data name') def write(self, data_set, io_manager, location, write_metadata=True, - force_write=False): + force_write=False, only_complete=True): """ Write the DataSet to storage. @@ -63,6 +63,8 @@ def write(self, data_set, io_manager, location, write_metadata=True, location (str): the file location within the io_manager. write_metadata (bool): if True, then the metadata is written to disk force_write (bool): if True, then the data is written to disk + only_complete (bool): Used only by the gnuplot formatter's + overridden version of this method """ raise NotImplementedError @@ -190,7 +192,9 @@ def match_save_range(self, group, file_exists, only_complete=True): Returns: Tuple(int, int): the first and last raveled indices that should - be saved. + be saved. Returns None if: + * no data is present + * no new data can be found """ inner_setpoint = group.set_arrays[-1] full_dim_data = (inner_setpoint, ) + group.data diff --git a/qcodes/data/gnuplot_format.py b/qcodes/data/gnuplot_format.py index b121495a2c7..c2124572968 100644 --- a/qcodes/data/gnuplot_format.py +++ b/qcodes/data/gnuplot_format.py @@ -2,12 +2,16 @@ import re import math import json +import logging from qcodes.utils.helpers import deep_update, NumpyJSONEncoder from .data_array import DataArray from .format import Formatter +log = logging.getLogger(__name__) + + class GNUPlotFormat(Formatter): """ Saves data in one or more gnuplot-format files. We make one file for @@ -239,7 +243,8 @@ def _get_labels(self, labelstr): parts = re.split('"\s+"', labelstr[1:-1]) return [l.replace('\\"', '"').replace('\\\\', '\\') for l in parts] - def write(self, data_set, io_manager, location, force_write=False, write_metadata=True): + def write(self, data_set, io_manager, location, force_write=False, + write_metadata=True, only_complete=True): """ Write updates in this DataSet to storage. @@ -249,6 +254,11 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat data_set (DataSet): the data we're storing io_manager (io_manager): the base location to write to location (str): the file location within io_manager + only_complete (bool): passed to match_save_range, answers the + following question: Should we write all available new data, + or only complete rows? Is used to make sure that everything + gets written when the DataSet is finalised, even if some + dataarrays are strange (like, full of nans) """ arrays = data_set.arrays @@ -257,16 +267,20 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat existing_files = set(io_manager.list(location)) written_files = set() - # Every group gets it's own datafile + # Every group gets its own datafile for group in groups: + log.debug('Attempting to write the following ' + 'group: {}'.format(group)) fn = io_manager.join(location, group.name + self.extension) written_files.add(fn) file_exists = fn in existing_files - save_range = self.match_save_range(group, file_exists) + save_range = self.match_save_range(group, file_exists, + only_complete=only_complete) if save_range is None: + log.debug('Cannot match save range, skipping this group.') continue overwrite = save_range[0] == 0 or force_write @@ -276,6 +290,7 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat with io_manager.open(fn, open_mode) as f: if overwrite: f.write(self._make_header(group)) + log.debug('Wrote header to file') for i in range(save_range[0], save_range[1] + 1): indices = np.unravel_index(i, shape) @@ -291,7 +306,7 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat one_point = self._data_point(group, indices) f.write(self.separator.join(one_point) + self.terminator) - + log.debug('Wrote to file') # now that we've saved the data, mark it as such in the data. # we mark the data arrays and the inner setpoint array. Outer # setpoint arrays have different dimension (so would need a diff --git a/qcodes/data/hdf5_format.py b/qcodes/data/hdf5_format.py index 8e059019ddb..08a510262dd 100644 --- a/qcodes/data/hdf5_format.py +++ b/qcodes/data/hdf5_format.py @@ -126,7 +126,8 @@ def _create_data_object(self, data_set, io_manager=None, return data_set._h5_base_group def write(self, data_set, io_manager=None, location=None, - force_write=False, flush=True, write_metadata=True): + force_write=False, flush=True, write_metadata=True, + only_complete=False): """ Writes a data_set to an hdf5 file. @@ -137,12 +138,16 @@ def write(self, data_set, io_manager=None, location=None, force_write (bool): if True creates a new file to write to flush (bool) : whether to flush after writing, can be disabled for testing or performance reasons + only_complete (bool): Not used by this formatter, but must be + included in the call signature to avoid an "unexpected + keyword argument" TypeError. N.B. It is recommended to close the file after writing, this can be done by calling ``HDF5Format.close_file(data_set)`` or - ``data_set.finalize()`` if the data_set formatter is set to an hdf5 formatter. - Note that this is not required if the dataset is created from a Loop as this - includes a data_set.finalize() statement. + ``data_set.finalize()`` if the data_set formatter is set to an + hdf5 formatter. Note that this is not required if the dataset + is created from a Loop as this includes a data_set.finalize() + statement. The write function consists of two parts, writing DataArrays and writing metadata. diff --git a/qcodes/loops.py b/qcodes/loops.py index 5a898e0ceec..1d765a680bb 100644 --- a/qcodes/loops.py +++ b/qcodes/loops.py @@ -63,9 +63,11 @@ log = logging.getLogger(__name__) + def active_loop(): return ActiveLoop.active_loop + def active_data_set(): loop = active_loop() if loop is not None and loop.data_set is not None: @@ -73,6 +75,7 @@ def active_data_set(): else: return None + class Loop(Metadatable): """ The entry point for creating measurement loops @@ -780,16 +783,16 @@ def _compile_one(self, action, new_action_indices): return action def _run_wrapper(self, *args, **kwargs): - # try: - self._run_loop(*args, **kwargs) - # finally: - if hasattr(self, 'data_set'): - # TODO (giulioungaretti) WTF? - # somehow this does not show up in the data_set returned by - # run(), but it is saved to the metadata - ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - self.data_set.add_metadata({'loop': {'ts_end': ts}}) - self.data_set.finalize() + try: + self._run_loop(*args, **kwargs) + finally: + if hasattr(self, 'data_set'): + # TODO (giulioungaretti) WTF? + # somehow this does not show up in the data_set returned by + # run(), but it is saved to the metadata + ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + self.data_set.add_metadata({'loop': {'ts_end': ts}}) + self.data_set.finalize() def _run_loop(self, first_delay=0, action_indices=(), loop_indices=(), current_values=(), @@ -835,12 +838,15 @@ def _run_loop(self, first_delay=0, action_indices=(), new_values = current_values + (value,) data_to_store = {} - if hasattr(self.sweep_values, "parameters"): # combined parameter + if hasattr(self.sweep_values, "parameters"): # combined parameter set_name = self.data_set.action_id_map[action_indices] if hasattr(self.sweep_values, 'aggregate'): value = self.sweep_values.aggregate(*set_val) + log.debug('Calling .store method of DataSet because ' + 'sweep_values.parameters exist') self.data_set.store(new_indices, {set_name: value}) - for j, val in enumerate(set_val): # set_val list of values to set [param1_setpoint, param2_setpoint ..] + # set_val list of values to set [param1_setpoint, param2_setpoint ..] + for j, val in enumerate(set_val): set_index = action_indices + (j+n_callables, ) set_name = (self.data_set.action_id_map[set_index]) data_to_store[set_name] = val @@ -848,6 +854,8 @@ def _run_loop(self, first_delay=0, action_indices=(), set_name = self.data_set.action_id_map[action_indices] data_to_store[set_name] = value + log.debug('Calling .store method of DataSet because a sweep step' + ' was taken') self.data_set.store(new_indices, data_to_store) if not self._nest_first: @@ -856,6 +864,8 @@ def _run_loop(self, first_delay=0, action_indices=(), try: for f in callables: + log.debug('Going through callables at this sweep step.' + ' Calling {}'.format(f)) f(first_delay=delay, loop_indices=new_indices, current_values=new_values) @@ -889,14 +899,18 @@ def _run_loop(self, first_delay=0, action_indices=(), # run the background task one last time to catch the last setpoint(s) if self.bg_task is not None: + log.debug('Running the background task one last time.') self.bg_task() # the loop is finished - run the .then actions + log.debug('Finishing loop, running the .then actions...') for f in self._compile_actions(self.then_actions, ()): + log.debug('...running .then action {}'.format(f)) f() # run the bg_final_task from the bg_task: if self.bg_final_task is not None: + log.debug('Running the bg_final_task') self.bg_final_task() def _wait(self, delay): diff --git a/qcodes/tests/test_loop.py b/qcodes/tests/test_loop.py index 833192ce02e..1716e2aabd3 100644 --- a/qcodes/tests/test_loop.py +++ b/qcodes/tests/test_loop.py @@ -3,16 +3,29 @@ from unittest import TestCase import numpy as np from unittest.mock import patch +import os from qcodes.loops import Loop from qcodes.actions import Task, Wait, BreakIf, _QcodesBreak from qcodes.station import Station from qcodes.data.data_array import DataArray -from qcodes.instrument.parameter import Parameter +from qcodes.instrument.parameter import Parameter, MultiParameter from qcodes.utils.validators import Numbers from qcodes.utils.helpers import LogCapture -from .instrument_mocks import MultiGetter +from .instrument_mocks import MultiGetter, DummyInstrument + + +class NanReturningParameter(MultiParameter): + + def __init__(self, name, instrument, names=('first', 'second'), + shapes=((), ())): + + super().__init__(name=name, names=names, shapes=shapes, + instrument=instrument) + + def get_raw(self): # this results in a nan-filled DataArray + return (13,) class TestLoop(TestCase): @@ -21,6 +34,8 @@ def setUpClass(cls): cls.p1 = Parameter('p1', get_cmd=None, set_cmd=None, vals=Numbers(-10, 10)) cls.p2 = Parameter('p2', get_cmd=None, set_cmd=None, vals=Numbers(-10, 10)) cls.p3 = Parameter('p3', get_cmd=None, set_cmd=None, vals=Numbers(-10, 10)) + instr = DummyInstrument('dummy_bunny') + cls.p4_crazy = NanReturningParameter('p4_crazy', instrument=instr) Station().set_measurement(cls.p2, cls.p3) def test_nesting(self): @@ -93,6 +108,15 @@ def test_repr(self): ' Measured | p1 | p1 | (2,)') self.assertEqual(data.__repr__(), expected) + def test_measurement_with_many_nans(self): + loop = Loop(self.p1.sweep(0, 1, num=10), + delay=0.05).each(self.p4_crazy) + ds = loop.get_data_set() + loop.run() + + # assert that both the snapshot and the datafile are there + self.assertEqual(len(os.listdir(ds.location)), 2) + def test_default_measurement(self): self.p2.set(4) self.p3.set(5)