From 8417cc9f4d96ab555c9b65cac4e55f17b233fbd4 Mon Sep 17 00:00:00 2001 From: "gilberto.ruiz-j" Date: Fri, 29 Apr 2022 17:59:36 +0200 Subject: [PATCH] feat(DependencyGraph): add an option to cache on disk --- src/gemseo/core/dependency_graph.py | 174 +++++++++++++++++- .../data/dependency-graph/hash_to_path.pkl | Bin 0 -> 91 bytes ...h_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl | Bin 0 -> 3083 bytes tests/core/test_dependency_graph.py | 101 ++++++++++ 4 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 tests/core/data/dependency-graph/hash_to_path.pkl create mode 100644 tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl diff --git a/src/gemseo/core/dependency_graph.py b/src/gemseo/core/dependency_graph.py index 11d4fcd9f4..905a1c3654 100644 --- a/src/gemseo/core/dependency_graph.py +++ b/src/gemseo/core/dependency_graph.py @@ -20,8 +20,14 @@ from __future__ import annotations import logging +import pickle +from pathlib import Path from shutil import move +from typing import Iterable from typing import Iterator +from uuid import uuid4 + +from xxhash._xxhash import xxh3_64_hexdigest # graphviz is an optional dependency @@ -32,7 +38,6 @@ except ImportError: import networkx as nx from gemseo.core.discipline import MDODiscipline -from pathlib import Path LOGGER = logging.getLogger(__name__) @@ -140,7 +145,6 @@ class DependencyGraph: Returns: networkx.DiGraph: The graph of disciplines. """ - # python 2: for consistency with the python 3 version nodes_to_ios = {} for disc in disciplines: @@ -301,3 +305,169 @@ class DependencyGraph: graph (networkx.DiGraph): A graph. """ return [n for n in graph.nodes if graph.out_degree(n) == 0] + + +class CouplingGraphFactory: + """Factory to build instances of :class:`.DependencyGraph`. + + The factory uses a memory cache by default to avoid re-computing a previously + obtained graph. It is also possible to use a disk cache at a given directory. + + The disk cache creates two different pickle files: + - ``hash_to_path.pkl``: A pickled dictionary with the hashes of the graphs as + keys and the path of the corresponding ``spec_to_graph`` pickle file as + values. + - ``spec_to_graph_SOME_UNIQUE_ID.pkl``: A pickled dictionary with the specs as + keys and their corresponding graphs as values. Each pickled dictionary + has a unique id in its file name that is stored in ``hash_to_path`` after it + is created. + """ + + __HASH_TO_PATH = "hash_to_path.pkl" + __SPEC_TO_GRAPH = "spec_to_graph_.pkl" + + def __init__(self, coupling_cache_dir: str | Path | None = None): + """ + Args: + coupling_cache_dir: The path to the directory where the pickled files needed + for the disk cache are stored or will be stored. If ``None``, do not use + a disk cache, only a memory cache. + """ + self.__coupling_cache_dir = coupling_cache_dir + self.__hash_to_spec_file_path = {} + self.__spec_to_graph = {} + + if self.__coupling_cache_dir is not None: + self.__coupling_cache_dir = Path(coupling_cache_dir) + try: + self.__hash_to_spec_file_path = pickle.load( + open(self.__coupling_cache_dir / self.__HASH_TO_PATH, "rb") + ) + msg = "Found coupling graph disk cache in %s" + LOGGER.info(msg, self.__coupling_cache_dir) + except FileNotFoundError: + msg = ( + "No previous coupling graph disk cache found, the path %s will " + "be used to store it." + ) + LOGGER.info(msg, self.__coupling_cache_dir) + pass + + self.instances = {} + self.__hashes = {} + + @staticmethod + def __get_spec( + disciplines: Iterable[MDODiscipline], + ) -> tuple[str, tuple[str], tuple[str]]: + """Get the specs of the given disciplines. + + Args: + disciplines: The disciplines to get the specs from. + + Returns: + The disciplines' specs. + """ + spec = [] + names_to_d = {disc.name: disc for disc in disciplines} + + for name in sorted(names_to_d.keys()): + disc = names_to_d[name] + spec.append( + ( + name, + tuple(sorted(disc.get_input_data_names())), + tuple(sorted(disc.get_output_data_names())), + ) + ) + return tuple(spec) + + def create(self, disciplines: Iterable[MDODiscipline]) -> DependencyGraph: + """Create the :class:`.DependencyGraph` for the given disciplines. + + Args: + disciplines: The disciplines used to create the :class:`.DependencyGraph`. + + Returns: + The :class:`.DependencyGraph` of the given disciplines. + """ + spec = self.__get_spec(disciplines) + hashed_spec = xxh3_64_hexdigest(bytes(str(spec), "utf-8")) + + # Attempt to recover from cache, either from memory or from disk. + if self.__coupling_cache_dir is not None: + graph = self.__get_graph_from_disk(spec, hashed_spec) + else: + graph = self.__get_graph_from_memory(spec, hashed_spec) + + if graph is None: + graph = DependencyGraph( + disciplines + ) # maybe pass discipline id's directly + list inputs-outputs + self.instances[spec] = graph + self.__hashes[hashed_spec] = spec + + if self.__coupling_cache_dir is not None: + unique_spec_file_path = self.__SPEC_TO_GRAPH.replace( + ".pkl", str(uuid4()) + ".pkl" + ) + self.__hash_to_spec_file_path[hashed_spec] = unique_spec_file_path + + pickle.dump( + self.__hash_to_spec_file_path, + open(self.__coupling_cache_dir / self.__HASH_TO_PATH, "wb"), + ) + + pickle.dump( + self.instances, + open(self.__coupling_cache_dir / unique_spec_file_path, "wb"), + ) + + return graph + + def __get_graph_from_memory( + self, spec: tuple[str, tuple[str], tuple[str]], hashed_spec: str + ) -> DependencyGraph | None: + """Load a dependency graph from memory. + + Args: + spec: The spec that corresponds to the dependency graph to be recovered. + hashed_spec: The hash of the spec. + + Returns: + The dependency graph if available. Otherwise, return ``None``. + """ + graph = None + spec_in_hashes = self.__hashes.get(hashed_spec) + + if spec_in_hashes is not None: + graph = self.instances.get(spec) + + return graph + + def __get_graph_from_disk( + self, spec: tuple[str, tuple[str], tuple[str]], hashed_spec: str + ) -> DependencyGraph | None: + """Load a dependency graph from disk. + + Args: + hashed_spec: The hash of the spec. + + Returns: + The dependency graph if available. Otherwise, return ``None``. + """ + graph = None + unique_spec_file_path = self.__hash_to_spec_file_path.get(hashed_spec) + + if unique_spec_file_path is not None: + self.instances = pickle.load( + open(self.__coupling_cache_dir / unique_spec_file_path, "rb") + ) + graph = self.instances.get(spec) + + return graph + + # Maybe use a discipline proxy object with the methods .get_input_data_names, + # .get_output_data_names + # DisciplineProxy(input_data_names, output_data_names, name) + # self.get_output_data_names, attribute self.name diff --git a/tests/core/data/dependency-graph/hash_to_path.pkl b/tests/core/data/dependency-graph/hash_to_path.pkl new file mode 100644 index 0000000000000000000000000000000000000000..87f8194ee7fd35c1920db12af9bc00e957fe12e4 GIT binary patch literal 91 zcmZo*nHsOHo6&Iu?$Cu>Crxzs_WW*a9m?fr} qrkd*}n_8skniv}!=~^ZkTk58orzR$+Bpaorn5OC#WamsN)&l^Yup2l4 literal 0 HcmV?d00001 diff --git a/tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl b/tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ebacfa258db5147c2c11382acb7a78c95b435a13 GIT binary patch literal 3083 zcmZo*naaS!00y;Fdf0EWn|FV09TN==#4!(16}G^HI( z8%}8jv7j1^(3KfZX#*=W0xN5q(j$|enp>Qjua}%(l&Y7KT9BHTlA4!X8J}L1SdcNL zhu;M*|kQge&-iovl4rKV&sLIY2uN2n;ZI5VX*F(L~aV)29~_U+|(&O ze9(jg3V?#rk|{lG#mO0|xrt!+va3KjJ!%;xB?VUc`XG~aA%c4OMd|t}MTu!8x&|it z5V0veEG3l%sZ)B`@{_VslS`)ba1|8g7o--IWTqBRshyI+Ry#!_!@7qxv8X7qa!L&V6u%zc%sk{M z%`gJzAB_yt9$r}7fK^P%Fb4&mCDCz_VVz-uw65I=+E_&^>9W$onD;^O?GDSkbC@tMU4qf@4I_6Wndi6xo&dGWdVDXCL> zSQ1N1@~8Ch$ET#GC6?wuJy$#>!ylZdGJ-%Um#IfIuQa!y5>lh(g8T<9jdftL&jZdveO^z?f&&f=# zoYKP+wYlM-Y-9{F&EDlBn29O}{gyQ1T9@h8*P@@MFDPUb$+>8tiP{mp7 YAT~%%acOe$lq^<828Pn4;-q3d0DW4OUjP6A literal 0 HcmV?d00001 diff --git a/tests/core/test_dependency_graph.py b/tests/core/test_dependency_graph.py index 4fcf89bf3b..12526441dd 100644 --- a/tests/core/test_dependency_graph.py +++ b/tests/core/test_dependency_graph.py @@ -20,7 +20,9 @@ import json from pathlib import Path +import networkx as nx import pytest +from gemseo.core.dependency_graph import CouplingGraphFactory from gemseo.core.dependency_graph import DependencyGraph from gemseo.core.discipline import MDODiscipline from gemseo.problems.sellar.sellar import Sellar1 @@ -31,8 +33,10 @@ from gemseo.problems.sobieski.disciplines import SobieskiMission from gemseo.problems.sobieski.disciplines import SobieskiPropulsion from gemseo.problems.sobieski.disciplines import SobieskiStructure from numpy import ones +from xxhash._xxhash import xxh3_64_hexdigest DATA_PATH = Path(__file__).absolute().parent / "data" / "dependency-graph" +DIRNAME = Path(__file__).parent DISC_DESCRIPTIONS = { "3-weak": { @@ -184,3 +188,100 @@ class DisciplineEncoder(json.JSONEncoder): if isinstance(o, MDODiscipline): return str(o) return super().default(o) + + +@pytest.mark.parametrize( + "classes_1,classes_2,expected_equal", + [ + ((Sellar1, Sellar2), (Sellar1, Sellar2), True), + ((Sellar2, Sellar1), (Sellar1, Sellar2), True), + ((Sellar1, Sellar2), (Sellar1, SellarSystem), False), + ], +) +def test_coupling_graph_factory_from_memory(classes_1, classes_2, expected_equal): + """Test that the memory cache works properly. + + Args: + classes_1: A tuple of disciplines. + classes_2: A tuple of disciplines. + expected_equal: Whether the graphs of the disciplines are expected to be equal. + """ + factory = CouplingGraphFactory() + + disciplines_1 = create_disciplines_from_desc(classes_1) + disciplines_2 = create_disciplines_from_desc(classes_2) + + coupl1 = factory.create(disciplines_1) + coupl2 = factory.create(disciplines_2) + assert (id(coupl2) == id(coupl1)) == expected_equal + + +@pytest.mark.parametrize( + "classes_1,classes_2,expected_equal", + [ + ( + (Sellar1, Sellar2), + (Sellar1, Sellar2), + True, + ), + ( + (Sellar2, Sellar1), + (Sellar1, Sellar2), + True, + ), + ( + (Sellar1, Sellar2), + (Sellar1, SellarSystem), + False, + ), + ], +) +def test_coupling_graph_factory_from_disk( + classes_1, classes_2, expected_equal, tmp_wd, caplog +): + """Test that the disk cache works properly. + + Args: + classes_1: A tuple of disciplines. + classes_2: A tuple of disciplines. + expected_equal: Whether the graphs of the disciplines are expected to be equal. + tmp_wd : Fixture to move into a temporary directory. + caplog: Fixture to access and control log capturing. + """ + factory = CouplingGraphFactory(coupling_cache_dir=tmp_wd) + assert "No previous coupling graph disk cache found" in caplog.text + + disciplines_1 = create_disciplines_from_desc(classes_1) + disciplines_2 = create_disciplines_from_desc(classes_2) + + coupl1 = factory.create(disciplines_1) + coupl2 = factory.create(disciplines_2) + assert ( + nx.is_isomorphic(coupl1._DependencyGraph__graph, coupl2._DependencyGraph__graph) + == expected_equal + ) + + +def test_coupling_factory_existing_files(caplog): + """Test that the disk cache recovers graphs from previous runs. + + Args: + caplog: Fixture to access and control log capturing. + """ + factory = CouplingGraphFactory(coupling_cache_dir=DATA_PATH) + assert f"Found coupling graph disk cache in {DATA_PATH}" in caplog.text + + another_factory = CouplingGraphFactory() + + disciplines = [Sellar1(), Sellar2()] + + spec = another_factory._CouplingGraphFactory__get_spec(disciplines) + hashed_spec = xxh3_64_hexdigest(bytes(str(spec), "utf-8")) + + graph = factory._CouplingGraphFactory__get_graph_from_disk(spec, hashed_spec) + graph_2 = another_factory.create(disciplines) + + assert graph is not None + assert nx.is_isomorphic( + graph._DependencyGraph__graph, graph_2._DependencyGraph__graph + ) -- GitLab