diff --git a/src/gemseo/core/dependency_graph.py b/src/gemseo/core/dependency_graph.py index 11d4fcd9f444df74d3edf51414a2476bf615c633..905a1c3654a03f8bb811f0839503ee468d2f2117 100644 --- a/src/gemseo/core/dependency_graph.py +++ b/src/gemseo/core/dependency_graph.py @@ -20,8 +20,14 @@ from __future__ import annotations import logging +import pickle +from pathlib import Path from shutil import move +from typing import Iterable from typing import Iterator +from uuid import uuid4 + +from xxhash._xxhash import xxh3_64_hexdigest # graphviz is an optional dependency @@ -32,7 +38,6 @@ except ImportError: import networkx as nx from gemseo.core.discipline import MDODiscipline -from pathlib import Path LOGGER = logging.getLogger(__name__) @@ -140,7 +145,6 @@ class DependencyGraph: Returns: networkx.DiGraph: The graph of disciplines. """ - # python 2: for consistency with the python 3 version nodes_to_ios = {} for disc in disciplines: @@ -301,3 +305,169 @@ class DependencyGraph: graph (networkx.DiGraph): A graph. """ return [n for n in graph.nodes if graph.out_degree(n) == 0] + + +class CouplingGraphFactory: + """Factory to build instances of :class:`.DependencyGraph`. + + The factory uses a memory cache by default to avoid re-computing a previously + obtained graph. It is also possible to use a disk cache at a given directory. + + The disk cache creates two different pickle files: + - ``hash_to_path.pkl``: A pickled dictionary with the hashes of the graphs as + keys and the path of the corresponding ``spec_to_graph`` pickle file as + values. + - ``spec_to_graph_SOME_UNIQUE_ID.pkl``: A pickled dictionary with the specs as + keys and their corresponding graphs as values. Each pickled dictionary + has a unique id in its file name that is stored in ``hash_to_path`` after it + is created. + """ + + __HASH_TO_PATH = "hash_to_path.pkl" + __SPEC_TO_GRAPH = "spec_to_graph_.pkl" + + def __init__(self, coupling_cache_dir: str | Path | None = None): + """ + Args: + coupling_cache_dir: The path to the directory where the pickled files needed + for the disk cache are stored or will be stored. If ``None``, do not use + a disk cache, only a memory cache. + """ + self.__coupling_cache_dir = coupling_cache_dir + self.__hash_to_spec_file_path = {} + self.__spec_to_graph = {} + + if self.__coupling_cache_dir is not None: + self.__coupling_cache_dir = Path(coupling_cache_dir) + try: + self.__hash_to_spec_file_path = pickle.load( + open(self.__coupling_cache_dir / self.__HASH_TO_PATH, "rb") + ) + msg = "Found coupling graph disk cache in %s" + LOGGER.info(msg, self.__coupling_cache_dir) + except FileNotFoundError: + msg = ( + "No previous coupling graph disk cache found, the path %s will " + "be used to store it." + ) + LOGGER.info(msg, self.__coupling_cache_dir) + pass + + self.instances = {} + self.__hashes = {} + + @staticmethod + def __get_spec( + disciplines: Iterable[MDODiscipline], + ) -> tuple[str, tuple[str], tuple[str]]: + """Get the specs of the given disciplines. + + Args: + disciplines: The disciplines to get the specs from. + + Returns: + The disciplines' specs. + """ + spec = [] + names_to_d = {disc.name: disc for disc in disciplines} + + for name in sorted(names_to_d.keys()): + disc = names_to_d[name] + spec.append( + ( + name, + tuple(sorted(disc.get_input_data_names())), + tuple(sorted(disc.get_output_data_names())), + ) + ) + return tuple(spec) + + def create(self, disciplines: Iterable[MDODiscipline]) -> DependencyGraph: + """Create the :class:`.DependencyGraph` for the given disciplines. + + Args: + disciplines: The disciplines used to create the :class:`.DependencyGraph`. + + Returns: + The :class:`.DependencyGraph` of the given disciplines. + """ + spec = self.__get_spec(disciplines) + hashed_spec = xxh3_64_hexdigest(bytes(str(spec), "utf-8")) + + # Attempt to recover from cache, either from memory or from disk. + if self.__coupling_cache_dir is not None: + graph = self.__get_graph_from_disk(spec, hashed_spec) + else: + graph = self.__get_graph_from_memory(spec, hashed_spec) + + if graph is None: + graph = DependencyGraph( + disciplines + ) # maybe pass discipline id's directly + list inputs-outputs + self.instances[spec] = graph + self.__hashes[hashed_spec] = spec + + if self.__coupling_cache_dir is not None: + unique_spec_file_path = self.__SPEC_TO_GRAPH.replace( + ".pkl", str(uuid4()) + ".pkl" + ) + self.__hash_to_spec_file_path[hashed_spec] = unique_spec_file_path + + pickle.dump( + self.__hash_to_spec_file_path, + open(self.__coupling_cache_dir / self.__HASH_TO_PATH, "wb"), + ) + + pickle.dump( + self.instances, + open(self.__coupling_cache_dir / unique_spec_file_path, "wb"), + ) + + return graph + + def __get_graph_from_memory( + self, spec: tuple[str, tuple[str], tuple[str]], hashed_spec: str + ) -> DependencyGraph | None: + """Load a dependency graph from memory. + + Args: + spec: The spec that corresponds to the dependency graph to be recovered. + hashed_spec: The hash of the spec. + + Returns: + The dependency graph if available. Otherwise, return ``None``. + """ + graph = None + spec_in_hashes = self.__hashes.get(hashed_spec) + + if spec_in_hashes is not None: + graph = self.instances.get(spec) + + return graph + + def __get_graph_from_disk( + self, spec: tuple[str, tuple[str], tuple[str]], hashed_spec: str + ) -> DependencyGraph | None: + """Load a dependency graph from disk. + + Args: + hashed_spec: The hash of the spec. + + Returns: + The dependency graph if available. Otherwise, return ``None``. + """ + graph = None + unique_spec_file_path = self.__hash_to_spec_file_path.get(hashed_spec) + + if unique_spec_file_path is not None: + self.instances = pickle.load( + open(self.__coupling_cache_dir / unique_spec_file_path, "rb") + ) + graph = self.instances.get(spec) + + return graph + + # Maybe use a discipline proxy object with the methods .get_input_data_names, + # .get_output_data_names + # DisciplineProxy(input_data_names, output_data_names, name) + # self.get_output_data_names, attribute self.name diff --git a/tests/core/data/dependency-graph/hash_to_path.pkl b/tests/core/data/dependency-graph/hash_to_path.pkl new file mode 100644 index 0000000000000000000000000000000000000000..87f8194ee7fd35c1920db12af9bc00e957fe12e4 Binary files /dev/null and b/tests/core/data/dependency-graph/hash_to_path.pkl differ diff --git a/tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl b/tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ebacfa258db5147c2c11382acb7a78c95b435a13 Binary files /dev/null and b/tests/core/data/dependency-graph/spec_to_graph_106af5e7-c58d-4332-9b39-e7eacdc2fd5e.pkl differ diff --git a/tests/core/test_dependency_graph.py b/tests/core/test_dependency_graph.py index 4fcf89bf3bf02ef594cf411653e7cc11a7eed0ee..12526441ddeae21f409876cc0b3fb50869eb49a5 100644 --- a/tests/core/test_dependency_graph.py +++ b/tests/core/test_dependency_graph.py @@ -20,7 +20,9 @@ import json from pathlib import Path +import networkx as nx import pytest +from gemseo.core.dependency_graph import CouplingGraphFactory from gemseo.core.dependency_graph import DependencyGraph from gemseo.core.discipline import MDODiscipline from gemseo.problems.sellar.sellar import Sellar1 @@ -31,8 +33,10 @@ from gemseo.problems.sobieski.disciplines import SobieskiMission from gemseo.problems.sobieski.disciplines import SobieskiPropulsion from gemseo.problems.sobieski.disciplines import SobieskiStructure from numpy import ones +from xxhash._xxhash import xxh3_64_hexdigest DATA_PATH = Path(__file__).absolute().parent / "data" / "dependency-graph" +DIRNAME = Path(__file__).parent DISC_DESCRIPTIONS = { "3-weak": { @@ -184,3 +188,100 @@ class DisciplineEncoder(json.JSONEncoder): if isinstance(o, MDODiscipline): return str(o) return super().default(o) + + +@pytest.mark.parametrize( + "classes_1,classes_2,expected_equal", + [ + ((Sellar1, Sellar2), (Sellar1, Sellar2), True), + ((Sellar2, Sellar1), (Sellar1, Sellar2), True), + ((Sellar1, Sellar2), (Sellar1, SellarSystem), False), + ], +) +def test_coupling_graph_factory_from_memory(classes_1, classes_2, expected_equal): + """Test that the memory cache works properly. + + Args: + classes_1: A tuple of disciplines. + classes_2: A tuple of disciplines. + expected_equal: Whether the graphs of the disciplines are expected to be equal. + """ + factory = CouplingGraphFactory() + + disciplines_1 = create_disciplines_from_desc(classes_1) + disciplines_2 = create_disciplines_from_desc(classes_2) + + coupl1 = factory.create(disciplines_1) + coupl2 = factory.create(disciplines_2) + assert (id(coupl2) == id(coupl1)) == expected_equal + + +@pytest.mark.parametrize( + "classes_1,classes_2,expected_equal", + [ + ( + (Sellar1, Sellar2), + (Sellar1, Sellar2), + True, + ), + ( + (Sellar2, Sellar1), + (Sellar1, Sellar2), + True, + ), + ( + (Sellar1, Sellar2), + (Sellar1, SellarSystem), + False, + ), + ], +) +def test_coupling_graph_factory_from_disk( + classes_1, classes_2, expected_equal, tmp_wd, caplog +): + """Test that the disk cache works properly. + + Args: + classes_1: A tuple of disciplines. + classes_2: A tuple of disciplines. + expected_equal: Whether the graphs of the disciplines are expected to be equal. + tmp_wd : Fixture to move into a temporary directory. + caplog: Fixture to access and control log capturing. + """ + factory = CouplingGraphFactory(coupling_cache_dir=tmp_wd) + assert "No previous coupling graph disk cache found" in caplog.text + + disciplines_1 = create_disciplines_from_desc(classes_1) + disciplines_2 = create_disciplines_from_desc(classes_2) + + coupl1 = factory.create(disciplines_1) + coupl2 = factory.create(disciplines_2) + assert ( + nx.is_isomorphic(coupl1._DependencyGraph__graph, coupl2._DependencyGraph__graph) + == expected_equal + ) + + +def test_coupling_factory_existing_files(caplog): + """Test that the disk cache recovers graphs from previous runs. + + Args: + caplog: Fixture to access and control log capturing. + """ + factory = CouplingGraphFactory(coupling_cache_dir=DATA_PATH) + assert f"Found coupling graph disk cache in {DATA_PATH}" in caplog.text + + another_factory = CouplingGraphFactory() + + disciplines = [Sellar1(), Sellar2()] + + spec = another_factory._CouplingGraphFactory__get_spec(disciplines) + hashed_spec = xxh3_64_hexdigest(bytes(str(spec), "utf-8")) + + graph = factory._CouplingGraphFactory__get_graph_from_disk(spec, hashed_spec) + graph_2 = another_factory.create(disciplines) + + assert graph is not None + assert nx.is_isomorphic( + graph._DependencyGraph__graph, graph_2._DependencyGraph__graph + )