From 7036855095363693ea8249698f36e2ba655d549d Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Thu, 5 Dec 2024 16:05:11 +0100 Subject: [PATCH 1/2] Dal/Node: resolve names of bootstrap peers regularly --- src/bin_dal_node/daemon.ml | 39 +++++++++++++++++++++------ src/lib_gossipsub/gossipsub_intf.ml | 8 +++--- src/lib_gossipsub/gossipsub_worker.ml | 11 +++++--- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/bin_dal_node/daemon.ml b/src/bin_dal_node/daemon.ml index 684a3e905c63..17f240053d90 100644 --- a/src/bin_dal_node/daemon.ml +++ b/src/bin_dal_node/daemon.ml @@ -1169,21 +1169,43 @@ let run ~data_dir ~configuration_override = return configuration in let*! () = Event.(emit configuration_loaded) () in - let cctxt = Rpc_context.make endpoint in let* dal_config = fetch_dal_config cctxt in - let points = points @ dal_config.bootstrap_peers in + let bootstrap_names = points @ dal_config.bootstrap_peers in let*! () = - if points = [] then Event.(emit config_error_no_bootstrap) () + if bootstrap_names = [] then Event.(emit config_error_no_bootstrap) () else Lwt.return_unit in (* Resolve: - [points] from DAL node config file and CLI. - [dal_config.bootstrap_peers] from the L1 network config. *) - let* points = resolve points in - let*! () = - if points = [] then Event.(emit resolved_bootstrap_no_points) () - else Event.(emit resolved_bootstrap_points (List.length points)) + (* Update the list of bootstrap every 5 minutes *) + let* get_bootstrap_points = + let* current_points = resolve bootstrap_names in + let bootstrap_points = ref current_points in + let rec loop () = + catch_es + (fun () -> + let* current_points = resolve bootstrap_names in + let*! () = + if current_points = [] then + Event.(emit resolved_bootstrap_no_points) () + else + Event.( + emit resolved_bootstrap_points (List.length current_points)) + in + bootstrap_points := current_points ; + let*! () = Lwt_unix.sleep 300. in + loop ()) + ~catch_only:(function Lwt.Canceled -> true | _ -> false) + in + let dns_job = loop () in + let (_ : Lwt_exit.clean_up_callback_id) = + Lwt_exit.register_clean_up_callback ~loc:__LOC__ (fun _exit_status -> + let () = Lwt.cancel dns_job in + Lwt.return_unit) + in + return (fun () -> !bootstrap_points) in (* Create and start a GS worker *) let gs_worker = @@ -1227,7 +1249,7 @@ let run ~data_dir ~configuration_override = let gs_worker = Gossipsub.Worker.( make - ~bootstrap_points:points + ~bootstrap_points:get_bootstrap_points ~events_logging:Logging.event rng limits @@ -1236,6 +1258,7 @@ let run ~data_dir ~configuration_override = Gossipsub.Worker.start [] gs_worker ; gs_worker in + let points = get_bootstrap_points () in (* Create a transport (P2P) layer instance. *) let* transport_layer = let open Transport_layer_parameters in diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index 0c074149fcd0..082e2aa35a7d 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -1168,12 +1168,12 @@ module type WORKER = sig (** [make ~events_logging ~bootstrap_points rng limits parameters] initializes a new Gossipsub automaton with the given arguments. Then, it initializes and returns a worker for it. The [events_logging] function can be used to - define a handler for logging the worker's events. The list of - [bootstrap_points] represents the list of initially known peers' addresses - to which we may want to reconnect in the worker. *) + define a handler for logging the worker's events. [bootstrap_points] + allows to resolve a list of known peers' addresses to which we may want + to reconnect in the worker. *) val make : ?events_logging:(event -> unit Monad.t) -> - ?bootstrap_points:Point.t list -> + ?bootstrap_points:(unit -> Point.t list) -> Random.State.t -> (GS.Topic.t, GS.Peer.t, GS.Message_id.t, GS.span) limits -> (GS.Peer.t, GS.Message_id.t) parameters -> diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index d803c9b38ccd..125d01730ffd 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -267,7 +267,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : type worker_state = { stats : Introspection.stats; gossip_state : GS.state; - bootstrap_points : Point.Set.t; + bootstrap_points : unit -> Point.t list; trusted_peers : Peer.Set.t; connected_bootstrap_peers : Peer.Set.t; events_stream : event Stream.t; @@ -650,9 +650,12 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : |> emit_p2p_output state ~mk_output:(fun trusted_peer -> Connect {peer = trusted_peer; origin = Trusted}) ; let p2p_output_stream = state.p2p_output_stream in + let bootstrap_points = + state.bootstrap_points () |> Point.Set.of_list + in Point.Set.iter (fun point -> Stream.push (Connect_point {point}) p2p_output_stream) - state.bootstrap_points) ; + bootstrap_points) ; state let update_gossip_state state (gossip_state, output) = @@ -871,12 +874,12 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : event_loop_promise let make ?(events_logging = fun _event -> Monad.return ()) - ?(bootstrap_points = []) rng limits parameters = + ?(bootstrap_points = fun () -> []) rng limits parameters = { status = Starting; state = { - bootstrap_points = Point.Set.of_list bootstrap_points; + bootstrap_points; stats = Introspection.empty_stats (); gossip_state = GS.make rng limits parameters; trusted_peers = Peer.Set.empty; -- GitLab From 3815cffb759a0382b71d028f184bcff6da2eaa7c Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Fri, 6 Dec 2024 15:54:32 +0100 Subject: [PATCH 2/2] Dal/Node: dns refresh delay in constants --- src/bin_dal_node/constants.ml | 3 +++ src/bin_dal_node/constants.mli | 3 +++ src/bin_dal_node/daemon.ml | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bin_dal_node/constants.ml b/src/bin_dal_node/constants.ml index 8158207892ed..f64a9f254951 100644 --- a/src/bin_dal_node/constants.ml +++ b/src/bin_dal_node/constants.ml @@ -89,3 +89,6 @@ let crawler_retries_on_disconnection = 5 disconnection error is encountered while retrieving data from L1 outside the {!Layer1.iter_heads} callback. *) let crawler_re_processing_delay = 5. + +(* Sleep delay between refreshing the ips associated to bootstrap dns names *) +let bootstrap_dns_refresh_delay = 300. diff --git a/src/bin_dal_node/constants.mli b/src/bin_dal_node/constants.mli index 3bbb214ca632..78e12e6de696 100644 --- a/src/bin_dal_node/constants.mli +++ b/src/bin_dal_node/constants.mli @@ -78,3 +78,6 @@ val crawler_retries_on_disconnection : int disconnection error is encountered while retrieving data from L1 outside the {!Layer1.iter_heads} callback. *) val crawler_re_processing_delay : float + +(* Sleep delay between refreshing the ips associated to bootstrap dns names *) +val bootstrap_dns_refresh_delay : float diff --git a/src/bin_dal_node/daemon.ml b/src/bin_dal_node/daemon.ml index 17f240053d90..9e50d202604d 100644 --- a/src/bin_dal_node/daemon.ml +++ b/src/bin_dal_node/daemon.ml @@ -1195,7 +1195,7 @@ let run ~data_dir ~configuration_override = emit resolved_bootstrap_points (List.length current_points)) in bootstrap_points := current_points ; - let*! () = Lwt_unix.sleep 300. in + let*! () = Lwt_unix.sleep Constants.bootstrap_dns_refresh_delay in loop ()) ~catch_only:(function Lwt.Canceled -> true | _ -> false) in -- GitLab