From a71da17e4f9cd398d0d85503657a5fc61e35fcea Mon Sep 17 00:00:00 2001 From: Alex Pearce Date: Wed, 21 Aug 2019 14:23:51 +0200 Subject: [PATCH] Unconditionally save algorithm instantiation traceback. Towards #58. --- PyConf/python/PyConf/application.py | 91 ++++++++++++++++---- PyConf/python/PyConf/components.py | 63 ++++++++++++++ PyConf/python/PyConf/tests/test_algorithm.py | 30 +++++++ PyConf/tests/qmtest/test_example.qmt | 17 ++++ 4 files changed, 184 insertions(+), 17 deletions(-) diff --git a/PyConf/python/PyConf/application.py b/PyConf/python/PyConf/application.py index be0d53cda06..d93714bbbfe 100644 --- a/PyConf/python/PyConf/application.py +++ b/PyConf/python/PyConf/application.py @@ -134,28 +134,47 @@ class ApplicationOptions(ConfigurableUser): __slots__ = { # input related 'input_files': [], # type is list of str - 'input_type': '', # TODO use enum - 'input_raw_format': 0.3, - 'use_iosvc': False, - 'evt_max': -1, + 'input_type': + '', # TODO use enum + 'input_raw_format': + 0.3, + 'use_iosvc': + False, + 'evt_max': + -1, # conditions for processing - 'data_type': 'Upgrade', - 'dddb_tag': '', - 'conddb_tag': '', - 'simulation': True, + 'data_type': + 'Upgrade', + 'dddb_tag': + '', + 'conddb_tag': + '', + 'simulation': + True, # output related - 'output_file': '', - 'output_type': '', - 'histo_file': '', - 'output_level': INFO, + 'output_file': + '', + 'output_type': + '', + 'histo_file': + '', + 'output_level': + INFO, # multithreaded processing - 'n_threads': 1, - 'event_store': 'HiveWhiteBoard', # one of EvtStoreSvc, HiveWhiteBoard - 'n_event_slots': -1, # defaults to 1.2 * n_threads + 'n_threads': + 1, + 'event_store': + 'HiveWhiteBoard', # one of EvtStoreSvc, HiveWhiteBoard + 'n_event_slots': + -1, # defaults to 1.2 * n_threads # TODO such defaults can be expressed here explicitly # debugging - 'control_flow_file': 'control_flow.gv', - 'data_flow_file': 'data_flow.gv', + 'control_flow_file': + 'control_flow.gv', + 'data_flow_file': + 'data_flow.gv', + 'algorithm_instantiation_traceback_file': + 'algorithm_instantiation_traces.txt', } _propertyDocDct = { @@ -495,6 +514,8 @@ def configure(options, control_flow_node, public_tools=[]): control_flow_node, filename=fn_root, extensions=[fn_ext[1:]]) fn_root, fn_ext = os.path.splitext(options.data_flow_file) plot_data_flow(algs, filename=fn_root, extensions=[fn_ext[1:]]) + save_algorithm_instantiations( + algs, options.algorithm_instantiation_traceback_file) # Make sure nothing's configuring the DoD behind our back, e.g. LHCbApp appendPostConfigAction(assert_empty_dataondemand_service) @@ -572,3 +593,39 @@ def plot_control_flow(top_node, filename='control_flow', extensions=('gv', )): for ext in extensions: format = 'raw' if ext == 'gv' else ext graph.write('{}.{}'.format(filename, ext), format=format) + + +def save_algorithm_instantiations(algorithms, filename): + """Log instantiation traces for each algorithm to `filename`. + + Algorithms are printed alphabetically by their `fullname`. + """ + log.info('Creating algorithm instantiation logfile {}'.format(filename)) + + algs = set() + for alg in algorithms: + algs.update(alg.dependencies()) + traces = [ + alg.instantiation_stack_trace() + for alg in sorted(algs, key=lambda alg: alg.fullname) + ] + + # Create the file, overwriting anything that may already be there + with open(filename, 'wt') as f: + now = _gaudi_datetime_format(datetime.datetime.now()) + f.write("""# Algorithm instantation log created at {created_at} +# +# Each block below shows the traceback for an instantiation +# of an Algorithm. The full name of the Algorithm instance +# is shown at the beginning of each block. +# +# Colours are used for legibility, just `cat` this file to a terminal to see +# them or send it to a pager like `less`: +# +# cat this_file.txt | less -r +# +# The colours can be stripped with the `sed` utility: +# +# sed -r "s/[[:cntrl:]]\[([0-9]{{1,3}};)*[0-9]{{1,3}}m//g" this_file.txt +{traces} +""".format(created_at=now, traces='\n'.join(traces))) diff --git a/PyConf/python/PyConf/components.py b/PyConf/python/PyConf/components.py index 17980535885..04bb935dc35 100644 --- a/PyConf/python/PyConf/components.py +++ b/PyConf/python/PyConf/components.py @@ -25,12 +25,14 @@ try: except ImportError: from cgi import escape as html_escape import inspect +import logging import re import json import pydot from GaudiKernel.ConfigurableMeta import ConfigurableMeta +from IPython.core import ultratb from . import ConfigurationError, configurable from .dataflow import DataHandle, configurable_outputs, configurable_inputs, dataflow_config, contains_datahandle @@ -47,9 +49,15 @@ __all__ = [ 'setup_component', ] +log = logging.getLogger(__name__) + # String that separates a name from its unique ID _UNIQUE_SEPARATOR = '#' _UNIQUE_PREFIXES = defaultdict(lambda: 0) +# Number of lines of context to show for instantiation stack debugging +_STACK_CONTEXT = 3 +# Formatter for stack frame records +_TRACEBACK_FORMATTER = ultratb.VerboseTB() _FLOW_GRAPH_NODE_COLOUR = 'aliceblue' _FLOW_GRAPH_INPUT_COLOUR = 'deepskyblue1' @@ -241,6 +249,41 @@ def _get_and_check_properties(t, props): return OrderedDict(props) +def _stack_frame_filter(frame_record): + """Return True if a frame record should be displayed for debugging. + + Filters out records where the filename is inside PyConf or Gaudi such that + only frames in 'user code' are kept. + """ + _, filename, _, _, _, _ = frame_record + # This value is likely to be PyConf, but get it dynamically just in case + # the module is renamed + this_module = __name__.split('.')[0] + return (this_module not in filename and 'GaudiKernel' not in filename + and 'gaudirun.py' not in filename and + # This is the 'file' exec'd by gaudirun.py + '' not in filename) + + +def _format_stack_frames(stack, frame_record_filter=None): + """Return a stack of frame records as a traceback string. + + Parameters + ---------- + stack -- list + Frame records objects, as returned by `inspect.stack()`. + frame_record_filter -- callable + When called with a frame record object, returns True if a frame should + be displayed. + """ + # The standard Python traceback marker + s = 'Traceback (most recent call last):\n' + for frame_record in reversed(filter(frame_record_filter, stack)): + s += _TRACEBACK_FORMATTER.format_record(*frame_record) + s += '\n' + return s + + class force_location(str): """An indicator that a location must be set as defined. @@ -406,6 +449,8 @@ class Algorithm(object): instance._requireIOV = require_IOVLock instance._tools = _tools instance._outputs_define_identity = bool(forced_locations) + instance._instantiation_stack_frames = inspect.stack( + context=_STACK_CONTEXT) #make OUTPUTS ############# these were not needed for calculation of the ID (only the forced locations) def _make_outs(outputs): @@ -526,6 +571,15 @@ class Algorithm(object): all_inputs.update(_inputs_from_tool(tool)) return all_inputs + def dependencies(self): + """Return the set of all Algorithm dependencies, including itself.""" + deps = {self} + for inputs in self.inputs.values(): + inputs = inputs if isinstance(inputs, list) else [inputs] + for inp in inputs: + deps.update(inp.producer.dependencies()) + return deps + def configuration(self): config = dataflow_config() @@ -695,6 +749,15 @@ class Algorithm(object): 'cannot change member after initialization') return object.__delattr__(self, a) + def instantiation_stack_trace(self, + frame_record_filter=_stack_frame_filter): + """Return the stack trace string as it was at instantiation.""" + s = '# Algorithm instantiation: {!r}\n'.format(self.fullname) + s += _format_stack_frames( + self._instantiation_stack_frames, + frame_record_filter=frame_record_filter) + return s + # TODO(AP): maybe wrap_algorithm is better def make_algorithm(alg_type, diff --git a/PyConf/python/PyConf/tests/test_algorithm.py b/PyConf/python/PyConf/tests/test_algorithm.py index f4505c79476..9b4713bbeb0 100644 --- a/PyConf/python/PyConf/tests/test_algorithm.py +++ b/PyConf/python/PyConf/tests/test_algorithm.py @@ -8,6 +8,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization # # or submit itself to any jurisdiction. # ############################################################################### +from __future__ import absolute_import, division, print_function import pytest import re @@ -134,3 +135,32 @@ def algorithm_unequal_hashes(): def algorithm_hash_forced_outputs(): """An Algorithm with a forced output should have a different hash to one without.""" pass + + +def test_algorithm_instantiation_stack_trace(): + """An Algorithm should be able to show its instantiation traceback.""" + + def make_producer(): + return Algorithm( + IntDataProducer, + name="TracebackAlg", + # Change a property so that we *instantiate* an Algorithm here, + # not using a cached one + OutputLevel=5) + + def frame_record_filter(frame_record): + _, filename, _, _, _, _ = frame_record + # Filter out frames from py.test and Python itself + return 'site-packages' not in filename and 'runpy.py' not in filename + + # As this test is within PyConf, we don't want to filter out frame records + # from PyConf files + trace = make_producer().instantiation_stack_trace( + frame_record_filter=frame_record_filter) + + assert "Algorithm instantiation: 'Gaudi__Examples__IntDataProducer/TracebackAlg" in trace + # The traceback should show the source an the point of instantiation + # (remember that the IPython printout is decorated with terminal escape + # codes) + assert 'make_producer' in trace + assert 'OutputLevel' in trace diff --git a/PyConf/tests/qmtest/test_example.qmt b/PyConf/tests/qmtest/test_example.qmt index 995c87edb84..cd204435fcc 100644 --- a/PyConf/tests/qmtest/test_example.qmt +++ b/PyConf/tests/qmtest/test_example.qmt @@ -17,6 +17,7 @@ Run the example options and ensure they produce no warnings or errors. $PYCONFROOT/options/example.py +true countErrorLines({"FATAL": 0, "ERROR": 0, "WARNING": 0}) @@ -33,5 +34,21 @@ if "Gaudi__Examples__IntDataProducer#2" in stdout: # Should see correct order of values (first overridden, then default) if "Got value 123\nGot value 1\n" not in stdout: causes.append("Bind override of int_maker failed") + +# Should see the dump of the algorithm instantiations in the correct format +import os +fname_traces = "algorithm_instantiation_traces.txt" +if not os.path.exists(fname_traces): + causes.append("Algorithm instantiation traceback file not found: {}".format(fname_traces)) +else: + with open(fname_traces) as f: + lines = f.readlines() + if not lines[0].startswith("# Algorithm instantation log created at "): + causes.append("Malformed algorithm instantiation log") + ntracebacks = len([l for l in lines if l.startswith("Traceback ")]) + # The example options create 4 unique Algorithm instances + nexpected = 4 + if ntracebacks != nexpected: + causes.append("Expected {} tracebacks, found {}".format(nexpected, ntracebacks)) -- GitLab