From 5a6ed95b7688a4d9d292425a1bd98e270ed9ff64 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 18 Mar 2025 11:41:56 +0000 Subject: [PATCH 1/8] Agnostic_baker: Remove unnecessary get_current_proposal function --- src/lib_agnostic_baker/daemon.ml | 1 - src/lib_agnostic_baker/rpc_services.ml | 13 ------------- src/lib_agnostic_baker/rpc_services.mli | 5 ----- 3 files changed, 19 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 139009817625..b0308d271967 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -265,7 +265,6 @@ let run state = Lwt.return_unit) in let* () = may_start_initial_baker state in - let* _protocol_proposal = Rpc_services.get_current_proposal ~node_addr in let* head_stream = monitor_heads ~node_addr in (* Monitoring voting periods through heads monitoring to avoid missing UAUs. *) diff --git a/src/lib_agnostic_baker/rpc_services.ml b/src/lib_agnostic_baker/rpc_services.ml index 725f47615932..2f7c83582c29 100644 --- a/src/lib_agnostic_baker/rpc_services.ml +++ b/src/lib_agnostic_baker/rpc_services.ml @@ -62,19 +62,6 @@ let get_next_protocol_hash ~node_addr = let uri = Format.sprintf "%s/chains/main/blocks/head/metadata" node_addr in call_and_wrap_rpc ~node_addr ~uri ~f -let get_current_proposal ~node_addr = - let open Lwt_result_syntax in - let f json = - match json with - | `Null -> return_none - | `String s -> return_some @@ Protocol_hash.of_b58check_exn s - | _ -> tzfail (Cannot_decode_node_data "not an object") - in - let uri = - Format.sprintf "%s/chains/main/blocks/head/votes/current_proposal" node_addr - in - call_and_wrap_rpc ~node_addr ~uri ~f - let get_current_period ~node_addr = let open Lwt_result_syntax in let voting_period_field = "voting_period" in diff --git a/src/lib_agnostic_baker/rpc_services.mli b/src/lib_agnostic_baker/rpc_services.mli index bdf44fb647fb..dd3de07a0818 100644 --- a/src/lib_agnostic_baker/rpc_services.mli +++ b/src/lib_agnostic_baker/rpc_services.mli @@ -18,11 +18,6 @@ val request_uri : block. *) val get_next_protocol_hash : node_addr:string -> Protocol_hash.t tzresult Lwt.t -(** [get_current_proposal ~node_addr] returns the protocol hash of - the current voting period, if any. *) -val get_current_proposal : - node_addr:string -> Protocol_hash.t option tzresult Lwt.t - (** [get_current_period ~node_addr] returns the current voting period in addition to the number of remaining blocks until the end of the period. *) -- GitLab From c43f831b5e2d8bdc6e8dbfdf8c07d0a698aa71ae Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 18 Mar 2025 11:40:26 +0000 Subject: [PATCH 2/8] Agnostic_baker: Keep old baker for a safe number of levels after migration --- .../agnostic_baker_events.ml | 12 +++++ src/lib_agnostic_baker/daemon.ml | 52 +++++++++++++++---- src/lib_agnostic_baker/rpc_services.ml | 21 ++++++++ src/lib_agnostic_baker/rpc_services.mli | 3 ++ 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/lib_agnostic_baker/agnostic_baker_events.ml b/src/lib_agnostic_baker/agnostic_baker_events.ml index cdb6585bf353..4a591d8ffb79 100644 --- a/src/lib_agnostic_baker/agnostic_baker_events.ml +++ b/src/lib_agnostic_baker/agnostic_baker_events.ml @@ -70,6 +70,18 @@ let protocol_encountered = ("proto_hash", Protocol_hash.encoding) ~pp2:Protocol_hash.pp_short +let become_old_baker = + declare_2 + ~section + ~level:Notice + ~name:"become_old_baker" + ~msg: + "The old baker for protocol {proto_hash} will be shut down at level \ + {level_to_kill}." + ("proto_hash", Protocol_hash.encoding) + ~pp1:Protocol_hash.pp_short + ("level_to_kill", int31) + let waiting_for_active_protocol = declare_0 ~section diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index b0308d271967..fd4c12bd21a6 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -8,6 +8,11 @@ open Agnostic_baker_errors +(* Number of extra levels to keep the old baker alive before shutting it down. + This extra time is used to avoid halting the chain in cases such as + reorganization or high round migration blocks. *) +let extra_levels_for_old_baker = 3 + type process = {thread : int Lwt.t; canceller : int Lwt.u} type baker = {protocol_hash : Protocol_hash.t; process : process} @@ -95,6 +100,7 @@ type 'a state = { node_endpoint : string; baker_args : string list; mutable current_baker : baker option; + mutable old_baker : (baker * int) option; } type 'a t = 'a state @@ -126,11 +132,15 @@ let monitor_heads ~node_addr = ignore (loop () : unit Lwt.t) ; return stream -(** [hot_swap_baker ~state ~next_protocol_hash] performs a swap in the current - [~state] of the agnostic baker, exchanging the current baker with the one - corresponding to [~next_protocol_hash]. This is done by stopping the - current baking process and spawning a new process instead. *) -let hot_swap_baker ~state ~next_protocol_hash = +(** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash + ~level_to_kill_old_baker] performs a swap in the current [~state] of the + agnostic baker, exchanging the current baker from [~current_protocol_hash] + with the one corresponding to [~next_protocol_hash]. The current baker is kept + running for a few more levels after the migration (as dictated by + [~level_to_kill_old_baker]), for safety purposes (such as reorganisations after + the migration, or high round blocks). *) +let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash + ~level_to_kill_old_baker = let open Lwt_result_syntax in let* current_baker = match state.current_baker with @@ -142,8 +152,13 @@ let hot_swap_baker ~state ~next_protocol_hash = Agnostic_baker_events.(emit protocol_encountered) (next_proto_status, next_protocol_hash) in - (* Shutdown previous baker *) - let*! () = shutdown current_baker in + (* Current baker is moved to old baker, which will be killed after a safe + number of levels. *) + let*! () = + Agnostic_baker_events.(emit become_old_baker) + (current_protocol_hash, level_to_kill_old_baker) + in + state.old_baker <- Some (current_baker, level_to_kill_old_baker) ; state.current_baker <- None ; let* new_baker = spawn_baker next_protocol_hash ~baker_args:state.baker_args @@ -168,6 +183,19 @@ let monitor_voting_periods ~state head_stream = let*! () = Agnostic_baker_events.(emit period_status) (period_kind, remaining) in + (* Check if old baker from previous protocol exists, and in that case, if + it must be killed. *) + let* () = + match state.old_baker with + | None -> return_unit + | Some (old_baker, level_to_kill) -> + let* head_level = Rpc_services.get_level ~node_addr in + if level_to_kill <= head_level then ( + let*! () = shutdown old_baker in + state.old_baker <- None ; + return_unit) + else return_unit + in let* next_protocol_hash = Rpc_services.get_next_protocol_hash ~node_addr in @@ -178,7 +206,13 @@ let monitor_voting_periods ~state head_stream = in let* () = if not (Protocol_hash.equal current_protocol_hash next_protocol_hash) - then hot_swap_baker ~state ~next_protocol_hash + then + let* head_level = Rpc_services.get_level ~node_addr in + hot_swap_baker + ~state + ~current_protocol_hash + ~next_protocol_hash + ~level_to_kill_old_baker:(head_level + extra_levels_for_old_baker) else return_unit in loop () @@ -253,7 +287,7 @@ let may_start_initial_baker state = may_start ~head_stream:None () let create ~node_endpoint ~baker_args = - {node_endpoint; baker_args; current_baker = None} + {node_endpoint; baker_args; current_baker = None; old_baker = None} let run state = let open Lwt_result_syntax in diff --git a/src/lib_agnostic_baker/rpc_services.ml b/src/lib_agnostic_baker/rpc_services.ml index 2f7c83582c29..bde5622bb778 100644 --- a/src/lib_agnostic_baker/rpc_services.ml +++ b/src/lib_agnostic_baker/rpc_services.ml @@ -43,6 +43,27 @@ let call_and_wrap_rpc ~node_addr ~uri ~f = in raise Not_found +let get_level ~node_addr = + let open Lwt_result_syntax in + let f json = + (* Level field in the RPC result *) + let name = "level" in + let* v = + match json with + | `O fields -> ( + match List.assoc_opt ~equal:( = ) name fields with + | None -> tzfail (Cannot_decode_node_data ("missing field " ^ name)) + | Some node -> return node) + | _ -> tzfail (Cannot_decode_node_data "not an object") + in + let level = Ezjsonm.get_int v in + return level + in + let uri = + Format.sprintf "%s/chains/main/blocks/head/header/shell" node_addr + in + call_and_wrap_rpc ~node_addr ~uri ~f + let get_next_protocol_hash ~node_addr = let open Lwt_result_syntax in let f json = diff --git a/src/lib_agnostic_baker/rpc_services.mli b/src/lib_agnostic_baker/rpc_services.mli index dd3de07a0818..063adf0e11f0 100644 --- a/src/lib_agnostic_baker/rpc_services.mli +++ b/src/lib_agnostic_baker/rpc_services.mli @@ -13,6 +13,9 @@ val request_uri : uri:string -> (Cohttp_lwt_unix.Response.t * Cohttp_lwt.Body.t) tzresult Lwt.t +(** [get_level ~node_addr] returns the level of the block. *) +val get_level : node_addr:string -> (int, error trace) result Lwt.t + (** [get_next_protocol_hash ~node_addr] returns the protocol hash contained in the [next_protocol] field of the metadata of a block. *) -- GitLab From 5be3d88d325908dcd7d46e5c39192efbae171489 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 18 Mar 2025 12:01:40 +0000 Subject: [PATCH 3/8] Agnostic_baker: Refactorings and improved documentation --- src/bin_agnostic_baker/README.md | 7 +++- src/lib_agnostic_baker/daemon.ml | 57 ++++++++++++-------------------- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/src/bin_agnostic_baker/README.md b/src/bin_agnostic_baker/README.md index 3dff0205eaa2..16f828c25c30 100644 --- a/src/bin_agnostic_baker/README.md +++ b/src/bin_agnostic_baker/README.md @@ -45,6 +45,10 @@ baker run the baker process for ``, information obtained from the node Notice that the two types of arguments are separated by a clear `--`. +With the introduction of the agnostic baker, a unification of the CLI has also been +achieved, therefore there will not be incompatibilities between two consecutive protocol +baking commands. + ## How it works 1. **Initialization**: The daemon starts and connects to the specified Tezos node. @@ -53,4 +57,5 @@ Notice that the two types of arguments are separated by a clear `--`. 4. **Baker Execution**: The chosen baker process is executed with the specified arguments (`[OCTEZ-BAKER-COMMANDS]`). 5. **Chain Monitoring**: The daemon continuously monitors the chain for new blocks and protocol changes (based on the voting period). -6. **Protocol Updates**: If a new protocol is encountered, the daemon stops the current baker process and starts a new one matching the new protocol. This is currently done via an Lwt cancelling mechanism. +6. **Protocol Updates**: If a new protocol is encountered, the daemon stops the old baker process after a few levels have +passed since the migration (for safety purposes), and starts a new baker process for the new protocol. This is currently done via an Lwt cancelling mechanism. diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index fd4c12bd21a6..0ff1b093374a 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -17,7 +17,7 @@ type process = {thread : int Lwt.t; canceller : int Lwt.u} type baker = {protocol_hash : Protocol_hash.t; process : process} -let shutdown baker = +let shutdown_baker baker = let open Lwt_syntax in let* () = Agnostic_baker_events.(emit stopping_baker) baker.protocol_hash in Lwt.wakeup baker.process.canceller 0 ; @@ -63,8 +63,8 @@ let run_thread ~protocol_hash ~baker_commands ~baker_args ~cancel_promise cancel_promise; ] -(** [spawn_baker protocol_hash ~baker_args] spawns a baker for the given [protocol_hash] - with [~baker_args] as command line arguments. *) +(** [spawn_baker protocol_hash ~baker_args] spawns a new baker process for the + given [protocol_hash] with command-line arguments [~baker_args]. *) let spawn_baker protocol_hash ~baker_args = let open Lwt_result_syntax in let args_as_string = @@ -78,8 +78,8 @@ let spawn_baker protocol_hash ~baker_args = let*! () = Agnostic_baker_events.(emit starting_baker) (protocol_hash, args_as_string) in - (* The mocked binary argument is necessary for command line parsing done in the running - baker function below. It will be discarded, so its value is not important. *) + (* Prepend a dummy binary argument required for command-line parsing. The argument + will be discarded, so its value is not important. *) let baker_args = "./mock-binary" :: baker_args in let cancel_promise, canceller = Lwt.wait () in let* thread = @@ -133,12 +133,8 @@ let monitor_heads ~node_addr = return stream (** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash - ~level_to_kill_old_baker] performs a swap in the current [~state] of the - agnostic baker, exchanging the current baker from [~current_protocol_hash] - with the one corresponding to [~next_protocol_hash]. The current baker is kept - running for a few more levels after the migration (as dictated by - [~level_to_kill_old_baker]), for safety purposes (such as reorganisations after - the migration, or high round blocks). *) + ~level_to_kill_old_baker] moves the current baker into the old baker slot + (to be killed later) and spawns a new baker for [~next_protocol_hash] *) let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash ~level_to_kill_old_baker = let open Lwt_result_syntax in @@ -152,8 +148,6 @@ let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash Agnostic_baker_events.(emit protocol_encountered) (next_proto_status, next_protocol_hash) in - (* Current baker is moved to old baker, which will be killed after a safe - number of levels. *) let*! () = Agnostic_baker_events.(emit become_old_baker) (current_protocol_hash, level_to_kill_old_baker) @@ -166,16 +160,17 @@ let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash state.current_baker <- Some new_baker ; return_unit -(** [monitor_voting_periods ~state head_stream] creates a process which listens - to the [head_stream] stream (which returns the data of the heads of the network - chain) in order to know when to "hot swap" (fork) the current protocol baking - binary with the one associated with the next protocol. *) +(** [monitor_voting_periods ~state head_stream] continuously monitors [heads_stream] + to detect protocol changes. It will: + - Shut down an old baker it its time has come; + - Spawn and "hot-swap" to a new baker if the next protocol hash is different. *) let monitor_voting_periods ~state head_stream = let open Lwt_result_syntax in let node_addr = state.node_endpoint in let rec loop () = let*! v = Lwt_stream.get head_stream in match v with + | None -> tzfail Lost_node_connection | Some _tick -> let* period_kind, remaining = Rpc_services.get_current_period ~node_addr @@ -183,15 +178,14 @@ let monitor_voting_periods ~state head_stream = let*! () = Agnostic_baker_events.(emit period_status) (period_kind, remaining) in - (* Check if old baker from previous protocol exists, and in that case, if - it must be killed. *) + (* Kill old baker if its safe period has elapsed. *) let* () = match state.old_baker with | None -> return_unit - | Some (old_baker, level_to_kill) -> + | Some (old_baker, kill_level) -> let* head_level = Rpc_services.get_level ~node_addr in - if level_to_kill <= head_level then ( - let*! () = shutdown old_baker in + if head_level >= kill_level then ( + let*! () = shutdown_baker old_baker in state.old_baker <- None ; return_unit) else return_unit @@ -202,7 +196,7 @@ let monitor_voting_periods ~state head_stream = let* current_protocol_hash = match state.current_baker with | None -> tzfail Missing_current_baker - | Some v -> return v.protocol_hash + | Some baker -> return baker.protocol_hash in let* () = if not (Protocol_hash.equal current_protocol_hash next_protocol_hash) @@ -216,10 +210,8 @@ let monitor_voting_periods ~state head_stream = else return_unit in loop () - | None -> tzfail Lost_node_connection in - let* () = loop () in - return_unit + loop () (** [baker_thread ~state] monitors the current baker thread for any potential error, and it propagates any error that can appear. *) @@ -232,11 +224,9 @@ let baker_thread ~state = in if retcode = 0 then return_unit else tzfail Baker_process_error -(** [may_start_initial_baker state] aims to start the baker associated - to the current protocol. If the protocol is considered as [frozen] (not - [active] anymore), and there is thus no actual baker binary anymore, the - initial phase consists in waiting until an [active] protocol is observed on - monitored heads function. *) +(** [may_start_initial_baker state] recursively waits for an [active] protocol + and spawns a baker for it. If the protocol is [frozen] (not [active] anymore), it + waits for a head with an [active] protocol. *) let may_start_initial_baker state = let open Lwt_result_syntax in let*! () = Agnostic_baker_events.(emit experimental_binary) () in @@ -302,7 +292,4 @@ let run state = let* head_stream = monitor_heads ~node_addr in (* Monitoring voting periods through heads monitoring to avoid missing UAUs. *) - let* () = - Lwt.pick [monitor_voting_periods ~state head_stream; baker_thread ~state] - in - return_unit + Lwt.pick [monitor_voting_periods ~state head_stream; baker_thread ~state] -- GitLab From d54389a44cc9b492891ef0931e3ee624b5901c2a Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Thu, 20 Mar 2025 09:27:11 +0000 Subject: [PATCH 4/8] Tezt: Agnostic_baker: Refactor tezt --- tezt/tests/agnostic_baker_test.ml | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tezt/tests/agnostic_baker_test.ml b/tezt/tests/agnostic_baker_test.ml index 60fb3d5f9739..3222634ee730 100644 --- a/tezt/tests/agnostic_baker_test.ml +++ b/tezt/tests/agnostic_baker_test.ml @@ -29,6 +29,18 @@ let wait_for_active_protocol_waiting agnostic_baker = "waiting_for_active_protocol.v0" (fun _json -> Some ()) +let remote_sign client = + let* () = Client.forget_all_keys client in + let keys = Constant.activator :: (Account.Bootstrap.keys |> Array.to_list) in + let* signer = Signer.init ~keys () in + (* tell the baker to ask the signer for the bootstrap keys *) + let uri = Signer.uri signer in + Lwt_list.iter_s + (fun account -> + let Account.{alias; public_key_hash; _} = account in + Client.import_signer_key client ~alias ~public_key_hash ~signer:uri) + keys + (* Performs a protocol migration thanks to a UAU and the agnostic baker. *) let perform_protocol_migration ?node_name ?client_name ?parameter_file ?(use_remote_signer = false) ~blocks_per_cycle ~migration_level @@ -43,22 +55,7 @@ let perform_protocol_migration ?node_name ?client_name ?parameter_file ~migrate_to () in - let* () = - if use_remote_signer then - let* () = Client.forget_all_keys client in - let keys = - Constant.activator :: (Account.Bootstrap.keys |> Array.to_list) - in - let* signer = Signer.init ~keys () in - (* tell the baker to ask the signer for the bootstrap keys *) - let uri = Signer.uri signer in - Lwt_list.iter_s - (fun account -> - let Account.{alias; public_key_hash; _} = account in - Client.import_signer_key client ~alias ~public_key_hash ~signer:uri) - keys - else unit - in + let* () = if use_remote_signer then remote_sign client else unit in Log.info "Node %s initialized" (Node.name node) ; let baker = Agnostic_baker.create node client in let wait_for_active_protocol_waiting = -- GitLab From 24b8f92f4446c56d5bdd224c4d8e508449a9824d Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Thu, 20 Mar 2025 09:58:29 +0000 Subject: [PATCH 5/8] Tezt: Agnostic_baker: Check that old baker is killed after same number of levels --- tezt/lib_tezos/agnostic_baker.ml | 4 ++++ tezt/lib_tezos/agnostic_baker.mli | 5 +++++ tezt/tests/agnostic_baker_test.ml | 24 ++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/tezt/lib_tezos/agnostic_baker.ml b/tezt/lib_tezos/agnostic_baker.ml index e1f280dc5da9..7255ce07c082 100644 --- a/tezt/lib_tezos/agnostic_baker.ml +++ b/tezt/lib_tezos/agnostic_baker.ml @@ -22,6 +22,10 @@ type protocol_status = Active | Frozen | Ignore let protocol_status = function Protocol.Alpha -> Ignore | _ -> Active +(* This is hard-coded after the same value from [Daemon] module from + [src/lib_agnostic_baker]. *) +let extra_levels_for_old_baker = 3 + module Parameters = struct type persistent_state = { delegates : string list; diff --git a/tezt/lib_tezos/agnostic_baker.mli b/tezt/lib_tezos/agnostic_baker.mli index 4109b16f324c..62c00048f060 100644 --- a/tezt/lib_tezos/agnostic_baker.mli +++ b/tezt/lib_tezos/agnostic_baker.mli @@ -80,6 +80,11 @@ type protocol_status = Active | Frozen | Ignore (** Returns the protocol status given the full protocol value. *) val protocol_status : Protocol.t -> protocol_status +(** Number of extra levels to keep the old baker alive before shutting it down. + This extra time is used to avoid halting the chain in cases such as + reorganization or high round migration blocks. *) +val extra_levels_for_old_baker : int + (** Create a agnostic baker. This function just creates a value of type [t], it does not call {!val:run}. diff --git a/tezt/tests/agnostic_baker_test.ml b/tezt/tests/agnostic_baker_test.ml index 3222634ee730..b026e779a97d 100644 --- a/tezt/tests/agnostic_baker_test.ml +++ b/tezt/tests/agnostic_baker_test.ml @@ -29,6 +29,14 @@ let wait_for_active_protocol_waiting agnostic_baker = "waiting_for_active_protocol.v0" (fun _json -> Some ()) +let wait_for_become_old_baker agnostic_baker = + Agnostic_baker.wait_for agnostic_baker "become_old_baker.v0" (fun _json -> + Some ()) + +let wait_for_stopping_baker agnostic_baker = + Agnostic_baker.wait_for agnostic_baker "stopping_baker.v0" (fun _json -> + Some ()) + let remote_sign client = let* () = Client.forget_all_keys client in let keys = Constant.activator :: (Account.Bootstrap.keys |> Array.to_list) in @@ -75,6 +83,11 @@ let perform_protocol_migration ?node_name ?client_name ?parameter_file Log.info "Protocol %s activated" (Protocol.hash migrate_from) ; Log.info "Baking at least %d blocks to trigger migration" migration_level ; let* _level = Node.wait_for_level node migration_level in + let wait_for_become_old_baker = wait_for_become_old_baker baker in + Log.info + "Check that the baking process for %s is not killed" + (Protocol.tag migrate_from) ; + let* () = wait_for_become_old_baker in (* Ensure that the block before migration is consistent *) Log.info "Checking migration block consistency" ; let* () = @@ -96,6 +109,17 @@ let perform_protocol_migration ?node_name ?client_name ?parameter_file ~migrate_to ~level:(migration_level + 1) in + Log.info + "Check that baker for protocol %s is killed after %d levels" + (Protocol.tag migrate_from) + Agnostic_baker.extra_levels_for_old_baker ; + let wait_for_stopping_baker = wait_for_stopping_baker baker in + let* _level = + Node.wait_for_level + node + (migration_level + Agnostic_baker.extra_levels_for_old_baker) + in + let* () = wait_for_stopping_baker in (* Test that we can still bake after migration *) let* _level = Node.wait_for_level node baked_blocks_after_migration in let* () = Agnostic_baker.terminate baker in -- GitLab From 882cb0639e8fb947dd7a9cf66c2ff93c34e3fa04 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Thu, 20 Mar 2025 10:04:30 +0000 Subject: [PATCH 6/8] Agnostic_baker: Refactor daemon shutdown function for old baker --- src/lib_agnostic_baker/daemon.ml | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 0ff1b093374a..134efc76d606 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -17,12 +17,6 @@ type process = {thread : int Lwt.t; canceller : int Lwt.u} type baker = {protocol_hash : Protocol_hash.t; process : process} -let shutdown_baker baker = - let open Lwt_syntax in - let* () = Agnostic_baker_events.(emit stopping_baker) baker.protocol_hash in - Lwt.wakeup baker.process.canceller 0 ; - return_unit - (** [run_thread ~protocol_hash ~baker_commands ~baker_args ~cancel_promise ~logs_path] returns the main running thread for the baker given its protocol [~procol_hash], corresponding commands [~baker_commands], with the command line arguments given by @@ -160,6 +154,23 @@ let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash state.current_baker <- Some new_baker ; return_unit +(** [maybe_kill_old_baker state head_level] checks whether the [old_baker] process + from the [state] of the agnostic baker has surpassed its lifetime and it stops + it if that is the case. *) +let maybe_kill_old_baker state head_level = + let open Lwt_syntax in + match state.old_baker with + | None -> return_unit + | Some (old_baker, kill_level) -> + if head_level >= kill_level then ( + let* () = + Agnostic_baker_events.(emit stopping_baker) old_baker.protocol_hash + in + Lwt.wakeup old_baker.process.canceller 0 ; + state.old_baker <- None ; + return_unit) + else return_unit + (** [monitor_voting_periods ~state head_stream] continuously monitors [heads_stream] to detect protocol changes. It will: - Shut down an old baker it its time has come; @@ -178,18 +189,8 @@ let monitor_voting_periods ~state head_stream = let*! () = Agnostic_baker_events.(emit period_status) (period_kind, remaining) in - (* Kill old baker if its safe period has elapsed. *) - let* () = - match state.old_baker with - | None -> return_unit - | Some (old_baker, kill_level) -> - let* head_level = Rpc_services.get_level ~node_addr in - if head_level >= kill_level then ( - let*! () = shutdown_baker old_baker in - state.old_baker <- None ; - return_unit) - else return_unit - in + let* head_level = Rpc_services.get_level ~node_addr in + let*! () = maybe_kill_old_baker state head_level in let* next_protocol_hash = Rpc_services.get_next_protocol_hash ~node_addr in @@ -201,7 +202,6 @@ let monitor_voting_periods ~state head_stream = let* () = if not (Protocol_hash.equal current_protocol_hash next_protocol_hash) then - let* head_level = Rpc_services.get_level ~node_addr in hot_swap_baker ~state ~current_protocol_hash -- GitLab From 225215e4c7d4c3ef8d3f438f90f776d6a00b0667 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Fri, 21 Mar 2025 10:26:00 +0000 Subject: [PATCH 7/8] Agnostic_baker: Refactor old_baker into specific type --- src/lib_agnostic_baker/daemon.ml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 134efc76d606..e689f6f0d001 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -90,11 +90,13 @@ let spawn_baker protocol_hash ~baker_args = let*! () = Agnostic_baker_events.(emit baker_running) protocol_hash in return {protocol_hash; process = {thread; canceller}} +type baker_to_kill = {baker : baker; level_to_kill : int} + type 'a state = { node_endpoint : string; baker_args : string list; mutable current_baker : baker option; - mutable old_baker : (baker * int) option; + mutable old_baker : baker_to_kill option; } type 'a t = 'a state @@ -146,7 +148,8 @@ let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash Agnostic_baker_events.(emit become_old_baker) (current_protocol_hash, level_to_kill_old_baker) in - state.old_baker <- Some (current_baker, level_to_kill_old_baker) ; + state.old_baker <- + Some {baker = current_baker; level_to_kill = level_to_kill_old_baker} ; state.current_baker <- None ; let* new_baker = spawn_baker next_protocol_hash ~baker_args:state.baker_args @@ -161,12 +164,12 @@ let maybe_kill_old_baker state head_level = let open Lwt_syntax in match state.old_baker with | None -> return_unit - | Some (old_baker, kill_level) -> - if head_level >= kill_level then ( + | Some {baker; level_to_kill} -> + if head_level >= level_to_kill then ( let* () = - Agnostic_baker_events.(emit stopping_baker) old_baker.protocol_hash + Agnostic_baker_events.(emit stopping_baker) baker.protocol_hash in - Lwt.wakeup old_baker.process.canceller 0 ; + Lwt.wakeup baker.process.canceller 0 ; state.old_baker <- None ; return_unit) else return_unit -- GitLab From 56880fe0e09fc53d8c939cf228bc096fbc8dd727 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Fri, 21 Mar 2025 12:06:10 +0000 Subject: [PATCH 8/8] Agnostic_baker: Add parsing level function for chain head --- src/lib_agnostic_baker/daemon.ml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index e689f6f0d001..f396b39727e8 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -157,14 +157,21 @@ let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash state.current_baker <- Some new_baker ; return_unit -(** [maybe_kill_old_baker state head_level] checks whether the [old_baker] process +(** [parse_level head_info] retrieves the ["level"] field information from the + json information of the chain from [head_info]. *) +let parse_level head_info = + let json = Ezjsonm.from_string head_info in + Ezjsonm.find json ["level"] |> Ezjsonm.get_int + +(** [maybe_kill_old_baker state head_info] checks whether the [old_baker] process from the [state] of the agnostic baker has surpassed its lifetime and it stops it if that is the case. *) -let maybe_kill_old_baker state head_level = +let maybe_kill_old_baker state head_info = let open Lwt_syntax in match state.old_baker with | None -> return_unit | Some {baker; level_to_kill} -> + let head_level = parse_level head_info in if head_level >= level_to_kill then ( let* () = Agnostic_baker_events.(emit stopping_baker) baker.protocol_hash @@ -182,18 +189,17 @@ let monitor_voting_periods ~state head_stream = let open Lwt_result_syntax in let node_addr = state.node_endpoint in let rec loop () = - let*! v = Lwt_stream.get head_stream in - match v with + let*! head_info_opt = Lwt_stream.get head_stream in + match head_info_opt with | None -> tzfail Lost_node_connection - | Some _tick -> + | Some head_info -> let* period_kind, remaining = Rpc_services.get_current_period ~node_addr in let*! () = Agnostic_baker_events.(emit period_status) (period_kind, remaining) in - let* head_level = Rpc_services.get_level ~node_addr in - let*! () = maybe_kill_old_baker state head_level in + let*! () = maybe_kill_old_baker state head_info in let* next_protocol_hash = Rpc_services.get_next_protocol_hash ~node_addr in @@ -209,7 +215,8 @@ let monitor_voting_periods ~state head_stream = ~state ~current_protocol_hash ~next_protocol_hash - ~level_to_kill_old_baker:(head_level + extra_levels_for_old_baker) + ~level_to_kill_old_baker: + (parse_level head_info + extra_levels_for_old_baker) else return_unit in loop () -- GitLab