From 596cc9fe06759559839ae7293132106d82b25fbf Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Tue, 12 Dec 2023 10:52:43 +0100 Subject: [PATCH 1/6] Tezt/Dal: improve peers connection/discovery test --- tezt/tests/dal.ml | 97 +++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 42 deletions(-) diff --git a/tezt/tests/dal.ml b/tezt/tests/dal.ml index ffe7242eea9b..cae55f6dc9bf 100644 --- a/tezt/tests/dal.ml +++ b/tezt/tests/dal.ml @@ -3597,30 +3597,13 @@ let test_baker_registers_profiles protocol _parameters _cryptobox l1_node client let* () = Lwt_unix.sleep 2.0 in check_profiles ~__LOC__ dal_node ~expected:(Operator profiles) -(** Tests that a peer can discover another peer via a bootstrap node. - - There are three nodes in the test: - - dal_node1: The bootstrap node. - - dal_node2: An attester for pkh of boostrap1. - - dal_node3: A slot producer for slot index 0. - - [dal_node2] should connect to [dal_node1] at startup. - [dal_node3] should also connect to [dal_node1] at startup. - [dal_node2] and [dal_node3] should find each other via [dal_node1]. *) -let connect_nodes_via_bootstrap_node _protocol _parameters _cryptobox node - client dal_node1 = - let* dal_node2 = - make_dal_node - ~peers:[Dal_node.listen_addr dal_node1] - ~attester_profiles:[Constant.bootstrap1.public_key_hash] - node - in - let* dal_node3 = - make_dal_node - ~peers:[Dal_node.listen_addr dal_node1] - ~producer_profiles:[0] - node - in +(** This helper funciton terminates dal_node2 and dal_node3 (in addition to + those in [extra_nodes_to_restart]), and restart them after creating two + connection events to check that dal_node2 and dal_node3 find each other. *) +let observe_nodes_connection_via_bootstrap ?(extra_nodes_to_restart = []) client + dal_node2 dal_node3 = + let nodes = dal_node2 :: dal_node3 :: extra_nodes_to_restart in + let* () = List.map Dal_node.terminate nodes |> Lwt.join in let check_conn_event_from_2_to_3 = check_new_connection_event ~main_node:dal_node2 @@ -3633,6 +3616,7 @@ let connect_nodes_via_bootstrap_node _protocol _parameters _cryptobox node ~other_node:dal_node2 ~is_trusted:false in + let* () = List.map (Dal_node.run ~wait_ready:true) nodes |> Lwt.join in Log.info "Bake two times to finalize a block." ; let* () = Client.bake_for_and_wait client in let* () = Client.bake_for_and_wait client in @@ -3641,37 +3625,66 @@ let connect_nodes_via_bootstrap_node _protocol _parameters _cryptobox node let* () = Lwt.join [check_conn_event_from_2_to_3; check_conn_event_from_3_to_2] in - Lwt.return (dal_node2, dal_node3) + unit -(** See {!connect_nodes_via_bootstrap_node} for the doc. *) +(** This function tests that a peer can discover another peer via a bootstrap + node and that discovery works even when the bootstrap is (re-)started at the + same time (or after) the two other nodes we want to connect. *) let test_peer_discovery_via_bootstrap_node _protocol _parameters _cryptobox node client dal_node1 = - let* _dal_node2, _dal_node3 = - connect_nodes_via_bootstrap_node - _protocol - _parameters - _cryptobox + (* Phase 1: dal_node1 is already running. Start dal_node2 and dal_node3 and + use dal_node1 to establish connections between them. *) + let* dal_node2 = + make_dal_node + ~peers:[Dal_node.listen_addr dal_node1] + ~attester_profiles:[Constant.bootstrap1.public_key_hash] node - client - dal_node1 in - unit + let* dal_node3 = + make_dal_node + ~peers:[Dal_node.listen_addr dal_node1] + ~producer_profiles:[0] + node + in + (* Here, we observe a first nodes connection via bootstrap nodes thanks to + peers exchange. *) + let* () = observe_nodes_connection_via_bootstrap client dal_node2 dal_node3 in + + (* In this variant, we also restart the bootstrap node [dal_node1]. So, + connections to it from dal_node2 and dal_node3 are always done at startup, + but Gossipsub worker might be needed to retry connection. *) + observe_nodes_connection_via_bootstrap + ~extra_nodes_to_restart:[dal_node1] + client + dal_node2 + dal_node3 (** Connect two nodes [dal_node2] and [dal_node3] via a trusted bootstrap peer - [dal_node1]. Then, disconnect all the nodes and wait for reconnection. *) + [dal_node1]. Then, disconnect all the nodes (without restarting them) and + wait for reconnection. *) let test_peers_reconnection _protocol _parameters _cryptobox node client dal_node1 = (* Connect two nodes via bootstrap peer. *) Log.info "Connect two nodes via bootstrap peer." ; - let* dal_node2, dal_node3 = - connect_nodes_via_bootstrap_node - _protocol - _parameters - _cryptobox + let* dal_node2 = + make_dal_node + ~peers:[Dal_node.listen_addr dal_node1] + ~attester_profiles:[Constant.bootstrap1.public_key_hash] node - client - dal_node1 in + let* dal_node3 = + make_dal_node + ~peers:[Dal_node.listen_addr dal_node1] + ~producer_profiles:[0] + node + in + let* () = + (* Here, we observe a first nodes connection via bootstrap nodes thanks to + peers exchange. Below, we disconnect the nodes without restarting + them. *) + observe_nodes_connection_via_bootstrap client dal_node2 dal_node3 + in + (* Get the nodes' identities. *) let id dal_node = JSON.(Dal_node.read_identity dal_node |-> "peer_id" |> as_string) -- GitLab From 6b86a6d6b27c4f6c1c7ae6bb795d71099e9ce0b9 Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Tue, 12 Dec 2023 13:12:49 +0100 Subject: [PATCH 2/6] Dal: add the notion of Point in the Gossipsub worker --- src/lib_dal_node/gossipsub/gs_interface.ml | 1 + src/lib_dal_node_services/types.ml | 18 ++++++++++++++++++ src/lib_dal_node_services/types.mli | 17 +++++++++++++++++ src/lib_gossipsub/gossipsub_intf.ml | 2 ++ src/lib_gossipsub/gossipsub_worker.ml | 4 +++- .../test/test_gossipsub_shared.ml | 19 ++++++++++--------- 6 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/lib_dal_node/gossipsub/gs_interface.ml b/src/lib_dal_node/gossipsub/gs_interface.ml index 8a52de0adb0a..4d0c2b960853 100644 --- a/src/lib_dal_node/gossipsub/gs_interface.ml +++ b/src/lib_dal_node/gossipsub/gs_interface.ml @@ -89,6 +89,7 @@ module Worker_config : and type 'a Monad.t = 'a Lwt.t = struct module GS = Tezos_gossipsub.Automaton (Automaton_config) module Monad = Monad + module Point = Types.Point (* TODO: https://gitlab.com/tezos/tezos/-/issues/5596 diff --git a/src/lib_dal_node_services/types.ml b/src/lib_dal_node_services/types.ml index 558d7c2669c9..60cde0082e5a 100644 --- a/src/lib_dal_node_services/types.ml +++ b/src/lib_dal_node_services/types.ml @@ -191,6 +191,24 @@ module Peer = struct let encoding = P2p_peer.Id.encoding end +module Point = struct + type t = P2p_point.Id.t + + module Cmp = struct + type nonrec t = t + + let compare p1 p2 = P2p_point.Id.compare p1 p2 + end + + include Compare.Make (Cmp) + module Set = Set.Make (Cmp) + module Map = Map.Make (Cmp) + + let pp = P2p_point.Id.pp + + let encoding = P2p_point.Id.encoding +end + let get_value ~__LOC__ func = Option.value_f ~default:(fun () -> Stdlib.failwith diff --git a/src/lib_dal_node_services/types.mli b/src/lib_dal_node_services/types.mli index 9ac1275f09fb..b44f00d471d1 100644 --- a/src/lib_dal_node_services/types.mli +++ b/src/lib_dal_node_services/types.mli @@ -118,6 +118,23 @@ module Peer : sig module Map : Map.S with type key = t end +(** A point is made of an IP address and a port. Only the worker knows about + the notion. The automaton only sees peers (i.e. cryptographic identities of + nodes). *) +module Point : sig + type t = P2p_point.Id.t + + include PRINTABLE with type t := t + + include ENCODABLE with type t := t + + include COMPARABLE with type t := t + + module Set : Set.S with type elt = t + + module Map : Map.S with type key = t +end + module Span : sig type t = Ptime.Span.t diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index d410fbff252c..9d3afd4fb018 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -1027,6 +1027,8 @@ module type WORKER_CONFIGURATION = sig (** The gossipsub automaton that will be used by the worker. *) module GS : AUTOMATON + module Point : ITERABLE + (** Abstraction of the IO monad used by the worker. *) module Monad : sig (** The monad type. *) diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 4536047734c0..2f2beea87a7c 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -32,7 +32,8 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : Gossipsub_intf.WORKER with module GS = C.GS and module Monad = C.Monad - and module Stream = C.Stream = struct + and module Stream = C.Stream + and module Point = C.Point = struct module GS = C.GS module Monad = C.Monad module Stream = C.Stream @@ -41,6 +42,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : module Peer = GS.Peer module Message_id = GS.Message_id module Message = GS.Message + module Point = C.Point module Introspection = struct type stats = { diff --git a/src/lib_gossipsub/test/test_gossipsub_shared.ml b/src/lib_gossipsub/test/test_gossipsub_shared.ml index 78f69a43ae14..a226fb6302e3 100644 --- a/src/lib_gossipsub/test/test_gossipsub_shared.ml +++ b/src/lib_gossipsub/test/test_gossipsub_shared.ml @@ -117,6 +117,15 @@ module Validity_hook = struct let apply msg msg_id = !validity msg msg_id end +module Int_iterable = struct + include Compare.Int + + let pp = Format.pp_print_int + + module Map = Map.Make (Int) + module Set = Set.Make (Int) +end + module Automaton_config : AUTOMATON_CONFIG with type Time.t = Milliseconds.t @@ -133,15 +142,6 @@ module Automaton_config : let now = Time.now end - module Int_iterable = struct - include Compare.Int - - let pp = Format.pp_print_int - - module Map = Map.Make (Int) - module Set = Set.Make (Int) - end - module String_iterable = struct include Compare.String @@ -330,6 +330,7 @@ let pp_limits fmtr (** Instantiate the worker functor *) module Worker_config = struct module GS = GS + module Point = Int_iterable module Monad = struct type 'a t = 'a Lwt.t -- GitLab From 9ac96cdbce60d3ed0ac5c1afceb754db5a1a6698 Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Tue, 12 Dec 2023 14:09:22 +0100 Subject: [PATCH 3/6] DAL: provide a list of bootstrap points to the GS worker --- src/bin_dal_node/daemon.ml | 28 +++++++++++++-------- src/lib_dal_node/gossipsub/gossipsub.mli | 2 ++ src/lib_dal_node/gossipsub/gs_interface.ml | 3 ++- src/lib_dal_node/gossipsub/gs_interface.mli | 2 ++ src/lib_gossipsub/gossipsub_intf.ml | 11 +++++--- src/lib_gossipsub/gossipsub_worker.ml | 6 +++-- src/lib_gossipsub/gossipsub_worker.mli | 1 + src/lib_gossipsub/tezos_gossipsub.mli | 1 + 8 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/bin_dal_node/daemon.ml b/src/bin_dal_node/daemon.ml index 481de7dfe2a7..2503b9667ab7 100644 --- a/src/bin_dal_node/daemon.ml +++ b/src/bin_dal_node/daemon.ml @@ -505,12 +505,12 @@ let connect_gossipsub_with_p2p gs_worker transport_layer node_store = ^ Printexc.to_string exn |> Stdlib.failwith) -let resolve peers = +let resolve points = List.concat_map_es (Tezos_base_unix.P2p_resolve.resolve_addr ~default_addr:"::" ~default_port:(Configuration_file.default.listen_addr |> snd)) - peers + points (* This function ensures the persistence of attester profiles to the configuration file at shutdown. @@ -556,7 +556,10 @@ let run ~data_dir configuration_override = let* ({ network_name; rpc_addr; - peers; + (* These are not the cryptographic identities of peers, but the points + (IP addresses + ports) of the nodes we want to connect to at + startup. *) + peers = points; endpoint; profiles; listen_addr; @@ -574,6 +577,12 @@ let run ~data_dir configuration_override = return configuration in let*! () = Event.(emit configuration_loaded) () in + let cctxt = Rpc_context.make endpoint in + let* dal_config = fetch_dal_config cctxt in + (* Resolve: + - [points] from DAL node config file and CLI. + - [dal_config.bootstrap_peers] from the L1 network config. *) + let* points = resolve (points @ dal_config.bootstrap_peers) in (* Create and start a GS worker *) let gs_worker = let rng = @@ -607,7 +616,12 @@ let run ~data_dir configuration_override = in let gs_worker = Gossipsub.Worker.( - make ~events_logging:Logging.event rng limits peer_filter_parameters) + make + ~bootstrap_points:points + ~events_logging:Logging.event + rng + limits + peer_filter_parameters) in Gossipsub.Worker.start [] gs_worker ; gs_worker @@ -624,7 +638,6 @@ let run ~data_dir configuration_override = ~network_name in let* store = Store.init config in - let cctxt = Rpc_context.make endpoint in let*! metrics_server = Metrics.launch config.metrics_addr in let ctxt = Node_context.init @@ -640,11 +653,6 @@ let run ~data_dir configuration_override = in let* rpc_server = RPC_server.(start config ctxt) in connect_gossipsub_with_p2p gs_worker transport_layer store ; - let* dal_config = fetch_dal_config cctxt in - (* Resolve: - - [peers] from DAL node config file and CLI. - - [dal_config.bootstrap_peers] from the L1 network config. *) - let* points = resolve (peers @ dal_config.bootstrap_peers) in (* activate the p2p instance. *) let*! () = Gossipsub.Transport_layer.activate ~additional_points:points transport_layer diff --git a/src/lib_dal_node/gossipsub/gossipsub.mli b/src/lib_dal_node/gossipsub/gossipsub.mli index dbe18274be23..292a8498a7c1 100644 --- a/src/lib_dal_node/gossipsub/gossipsub.mli +++ b/src/lib_dal_node/gossipsub/gossipsub.mli @@ -41,6 +41,7 @@ module Worker : sig and type GS.Span.t = Types.Span.t and type GS.Time.t = Types.Time.t and type 'a Monad.t = 'a Lwt.t + and type Point.t = Types.Point.t module Default_parameters : module type of Gs_default_parameters @@ -52,6 +53,7 @@ module Worker : sig and type GS.Peer.t = Types.Peer.t and type GS.Span.t = Types.Span.t and type GS.Time.t = Types.Time.t + and type Point.t = Types.Point.t module Logging : sig val event : event -> unit Monad.t diff --git a/src/lib_dal_node/gossipsub/gs_interface.ml b/src/lib_dal_node/gossipsub/gs_interface.ml index 4d0c2b960853..7fbffd1fdd9a 100644 --- a/src/lib_dal_node/gossipsub/gs_interface.ml +++ b/src/lib_dal_node/gossipsub/gs_interface.ml @@ -86,7 +86,8 @@ module Worker_config : and type GS.Peer.t = Types.Peer.t and type GS.Span.t = Types.Span.t and type GS.Time.t = Types.Time.t - and type 'a Monad.t = 'a Lwt.t = struct + and type 'a Monad.t = 'a Lwt.t + and type Point.t = Types.Point.t = struct module GS = Tezos_gossipsub.Automaton (Automaton_config) module Monad = Monad module Point = Types.Point diff --git a/src/lib_dal_node/gossipsub/gs_interface.mli b/src/lib_dal_node/gossipsub/gs_interface.mli index 61504e23d1f2..7a66f419f2e6 100644 --- a/src/lib_dal_node/gossipsub/gs_interface.mli +++ b/src/lib_dal_node/gossipsub/gs_interface.mli @@ -36,6 +36,7 @@ module Worker_config : and type GS.Span.t = Types.Span.t and type GS.Time.t = Types.Time.t and type 'a Monad.t = 'a Lwt.t + and type Point.t = Types.Point.t module Worker_instance : Gossipsub_intf.WORKER @@ -46,6 +47,7 @@ module Worker_instance : and type GS.Span.t = Types.Span.t and type GS.Time.t = Types.Time.t and type 'a Monad.t = 'a Lwt.t + and type Point.t = Types.Point.t module Validate_message_hook : sig val set : diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index 9d3afd4fb018..bceaa8d941a4 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -1144,12 +1144,15 @@ module type WORKER = sig | P2P_input of p2p_input | App_input of app_input - (** [make ~events_logging rng limits parameters] initializes a new Gossipsub - automaton with the given arguments. Then, it initializes and returns a - worker for it. The [events_logging] function can be used to define a - handler for logging the worker's events. *) + (** [make ~events_logging ~bootstrap_points rng limits parameters] initializes + a new Gossipsub automaton with the given arguments. Then, it initializes + and returns a worker for it. The [events_logging] function can be used to + define a handler for logging the worker's events. The list of + [bootstrap_points] represents the list of initially known peers' addresses + to which we may want to reconnect in the worker. *) val make : ?events_logging:(event -> unit Monad.t) -> + ?bootstrap_points:Point.t list -> Random.State.t -> (GS.Topic.t, GS.Peer.t, GS.Message_id.t, GS.span) limits -> (GS.Peer.t, GS.Message_id.t) parameters -> diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 2f2beea87a7c..27cd6bf7742d 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -217,6 +217,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : type worker_state = { stats : Introspection.stats; gossip_state : GS.state; + bootstrap_points : Point.Set.t; trusted_peers : Peer.Set.t; connected_bootstrap_peers : Peer.Set.t; events_stream : event Stream.t; @@ -763,12 +764,13 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : resolved. *) event_loop_promise - let make ?(events_logging = fun _event -> Monad.return ()) rng limits - parameters = + let make ?(events_logging = fun _event -> Monad.return ()) + ?(bootstrap_points = []) rng limits parameters = { status = Starting; state = { + bootstrap_points = Point.Set.of_list bootstrap_points; stats = Introspection.empty_stats (); gossip_state = GS.make rng limits parameters; trusted_peers = Peer.Set.empty; diff --git a/src/lib_gossipsub/gossipsub_worker.mli b/src/lib_gossipsub/gossipsub_worker.mli index 5437de79b3f2..16c7c5bab1d9 100644 --- a/src/lib_gossipsub/gossipsub_worker.mli +++ b/src/lib_gossipsub/gossipsub_worker.mli @@ -31,3 +31,4 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : and module GS.Topic = C.GS.Topic and module Monad = C.Monad and module Stream = C.Stream + and module Point = C.Point diff --git a/src/lib_gossipsub/tezos_gossipsub.mli b/src/lib_gossipsub/tezos_gossipsub.mli index 07a8cb32201e..cd8032a6f804 100644 --- a/src/lib_gossipsub/tezos_gossipsub.mli +++ b/src/lib_gossipsub/tezos_gossipsub.mli @@ -41,3 +41,4 @@ module Worker (C : Gossipsub_intf.WORKER_CONFIGURATION) : and module GS.Peer = C.GS.Peer and module Monad = C.Monad and module Stream = C.Stream + and module Point = C.Point -- GitLab From cb073df87f81c407f3065a9dd8ce44272c6af1eb Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Tue, 12 Dec 2023 16:51:47 +0100 Subject: [PATCH 4/6] Dal/GS: add necessary code to reconnect to bootstrap points --- src/lib_dal_node/gossipsub/gs_transport_connection.ml | 3 +++ src/lib_gossipsub/gossipsub_intf.ml | 5 +++++ src/lib_gossipsub/gossipsub_worker.ml | 8 ++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/lib_dal_node/gossipsub/gs_transport_connection.ml b/src/lib_dal_node/gossipsub/gs_transport_connection.ml index 2ad074e7562b..1448bb1b6d54 100644 --- a/src/lib_dal_node/gossipsub/gs_transport_connection.ml +++ b/src/lib_dal_node/gossipsub/gs_transport_connection.ml @@ -351,6 +351,9 @@ let gs_worker_p2p_output_handler gs_worker p2p_layer px_cache = (P2p.disconnect ~reason:"disconnected by Gossipsub" p2p_layer) | Connect {peer; origin} -> try_connect p2p_layer px_cache ~px_peer:peer ~origin + | Connect_point {point} -> + let* (_ : _ P2p.connection tzresult) = P2p.connect p2p_layer point in + return_unit | Forget {peer; origin} -> PX_cache.drop px_cache ~px_peer:peer ~origin:(PX origin) ; return_unit diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index bceaa8d941a4..42bbd0ae4902 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -1130,6 +1130,11 @@ module type WORKER = sig | Connect of {peer : GS.Peer.t; origin : peer_origin} (** Inform the p2p_output messages processor that we want to connect to the peer [peer] advertised by some other peer [origin]. *) + | Connect_point of {point : Point.t} + (** Version of connect where we provide a point directly. *) + (* TODO: https://gitlab.com/tezos/tezos/-/issues/6741 + + Unify the two Connect versions. Have the peers cache in the worker. *) | Forget of {peer : GS.Peer.t; origin : GS.Peer.t} (** Inform the p2p_output messages processor that we don't want to connect to the peer [peer] advertised by some other peer [origin]. *) diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 27cd6bf7742d..96d1f5295722 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -204,6 +204,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : | Disconnect of {peer : Peer.t} | Kick of {peer : Peer.t} | Connect of {peer : Peer.t; origin : peer_origin} + | Connect_point of {point : Point.t} | Forget of {peer : Peer.t; origin : Peer.t} type app_output = message_with_header @@ -244,7 +245,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : | Message_with_header _ -> Introspection.update_count_sent_app_messages stats `Incr | Subscribe _ | Unsubscribe _ -> ()) - | Connect _ | Disconnect _ | Forget _ | Kick _ -> () + | Connect _ | Connect_point _ | Disconnect _ | Forget _ | Kick _ -> () in fun {connected_bootstrap_peers; p2p_output_stream; stats; _} ~mk_output -> let maybe_emit to_peer = @@ -260,7 +261,8 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : IWant if the remote peer has a bootstrap profile. *) false | Graft _ | Prune _ | Subscribe _ | Unsubscribe _ -> true) - | Connect _ | Disconnect _ | Forget _ | Kick _ -> true + | Connect _ | Connect_point _ | Disconnect _ | Forget _ | Kick _ -> + true in if do_emit then ( update_sent_stats stats message ; @@ -854,6 +856,8 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : | PX peer -> Peer.pp fmt peer | Trusted -> Format.fprintf fmt "(trusted)") origin + | Connect_point {point} -> + Format.fprintf fmt "Connect_point{point=%a}" Point.pp point | Forget {peer; origin} -> Format.fprintf fmt -- GitLab From 3c4931f862a346683759c7584792eb5181100de6 Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Tue, 12 Dec 2023 17:00:32 +0100 Subject: [PATCH 5/6] Dal/GS: (also) attempt re-connection to bootstrap points --- src/lib_gossipsub/gossipsub_worker.ml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 96d1f5295722..84631f462fae 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -576,8 +576,8 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : (IHave {topic; message_ids}) (Seq.return peer)) ; (* Once every 15 hearbreat ticks, try to reconnect to trusted peers if - they are disconnected. *) - if Int64.(equal (rem gstate_view.heartbeat_ticks 15L) 0L) then + they are disconnected. Also try to reconnect to bootstrap points. *) + if Int64.(equal (rem gstate_view.heartbeat_ticks 15L) 0L) then ( (* TODO: https://gitlab.com/tezos/tezos/-/issues/6636 Put the value [15L] as a parameter. *) @@ -590,6 +590,10 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : Seq.empty |> emit_p2p_output state ~mk_output:(fun trusted_peer -> Connect {peer = trusted_peer; origin = Trusted}) ; + let p2p_output_stream = state.p2p_output_stream in + Point.Set.iter + (fun point -> Stream.push (Connect_point {point}) p2p_output_stream) + state.bootstrap_points) ; state let update_gossip_state state (gossip_state, output) = -- GitLab From f2ce43f52650fa44d14bb842cdcdca94064bf31a Mon Sep 17 00:00:00 2001 From: "iguerNL@Functori" Date: Mon, 18 Dec 2023 17:33:01 +0100 Subject: [PATCH 6/6] Dal/P2P: only try to connect to a point if no connection is present --- .../gossipsub/gs_transport_connection.ml | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/lib_dal_node/gossipsub/gs_transport_connection.ml b/src/lib_dal_node/gossipsub/gs_transport_connection.ml index 1448bb1b6d54..448477f8b310 100644 --- a/src/lib_dal_node/gossipsub/gs_transport_connection.ml +++ b/src/lib_dal_node/gossipsub/gs_transport_connection.ml @@ -280,6 +280,19 @@ let unwrap_p2p_message p2p_layer ~from_peer px_cache = | I.Message_with_header {message; topic; message_id} -> Message_with_header {message; topic; message_id} +let try_connect_point ?expected_peer_id p2p_layer point = + let open Lwt_syntax in + match P2p.pool p2p_layer with + | None -> return_unit + | Some pool -> + if Option.is_some @@ P2p_pool.Connection.find_by_point pool point then + return_unit (* already connected. *) + else + let* (_ : _ P2p.connection tzresult) = + P2p.connect ?expected_peer_id p2p_layer point + in + return_unit + let try_connect p2p_layer px_cache ~px_peer ~origin = let open Lwt_syntax in (* If there is some [point] associated to [px_peer] and advertised by [origin] @@ -306,9 +319,7 @@ let try_connect p2p_layer px_cache ~px_peer ~origin = This implementation will be hardened once we add the notion of "signed records" found, e.g., in Rust version, to check that the advertised (peer, point) pair alongside a timestamp are not faked. *) - let* (_ : _ P2p.connection tzresult) = - P2p.connect ~expected_peer_id:px_peer p2p_layer point - in + let* () = try_connect_point ~expected_peer_id:px_peer p2p_layer point in (match origin with | Trusted -> () (* Don't drop trusted points. *) | PX _ -> PX_cache.drop px_cache ~px_peer ~origin) ; @@ -351,9 +362,7 @@ let gs_worker_p2p_output_handler gs_worker p2p_layer px_cache = (P2p.disconnect ~reason:"disconnected by Gossipsub" p2p_layer) | Connect {peer; origin} -> try_connect p2p_layer px_cache ~px_peer:peer ~origin - | Connect_point {point} -> - let* (_ : _ P2p.connection tzresult) = P2p.connect p2p_layer point in - return_unit + | Connect_point {point} -> try_connect_point p2p_layer point | Forget {peer; origin} -> PX_cache.drop px_cache ~px_peer:peer ~origin:(PX origin) ; return_unit -- GitLab