From 5af8cfdc2ba72aa01ba4e821ba2652d04154d34b Mon Sep 17 00:00:00 2001 From: Emma Turner Date: Tue, 6 Dec 2022 13:37:09 +0000 Subject: [PATCH 1/4] Scoru/Wasm VM: fix scheduling of compute_step_many on eval finished When evaluation finishes, the `stuck_flag` is deleted if it exists. This bookkeeping was not happening under `eval_step_many_until`, as it treated the final eval tick as a padding tick. --- src/lib_scoru_wasm/wasm_vm.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib_scoru_wasm/wasm_vm.ml b/src/lib_scoru_wasm/wasm_vm.ml index b173d04c89ee..7b82d0c6de99 100644 --- a/src/lib_scoru_wasm/wasm_vm.ml +++ b/src/lib_scoru_wasm/wasm_vm.ml @@ -396,7 +396,9 @@ let input_request pvm_state = | _ -> Wasm_pvm_state.No_input_required let is_top_level_padding pvm_state = - eval_has_finished pvm_state.tick_state && not (is_time_for_snapshot pvm_state) + match pvm_state.tick_state with + | Padding -> not @@ is_time_for_snapshot pvm_state + | _ -> false let measure_executed_ticks (transition : pvm_state -> pvm_state Lwt.t) (initial_state : pvm_state) : (pvm_state * int64) Lwt.t = -- GitLab From 500fc2b50d88331d29a4af9fd58415464707d4d9 Mon Sep 17 00:00:00 2001 From: Emma Turner Date: Tue, 6 Dec 2022 13:50:36 +0000 Subject: [PATCH 2/4] Scoru/Wasm VM: continue computation when errors have fallback Previously, evaluation stopped when a Stuck state was reached. If the error was recoverable through the fallback mechanism, then the following evaluation would 'unstuck' the vm. This worked fine for the PVM, but resulted in the node halting execution when such an error was reached. Instead, we continue computation when an error is recoverable. --- src/lib_scoru_wasm/test/helpers/wasm_utils.ml | 12 +++ src/lib_scoru_wasm/test/test_init.ml | 24 +++-- src/lib_scoru_wasm/test/test_wasm_pvm.ml | 92 +++++++++---------- src/lib_scoru_wasm/wasm_vm.ml | 4 + 4 files changed, 75 insertions(+), 57 deletions(-) diff --git a/src/lib_scoru_wasm/test/helpers/wasm_utils.ml b/src/lib_scoru_wasm/test/helpers/wasm_utils.ml index 0cf85bb3adf3..2c2ff782dd7a 100644 --- a/src/lib_scoru_wasm/test/helpers/wasm_utils.ml +++ b/src/lib_scoru_wasm/test/helpers/wasm_utils.ml @@ -237,8 +237,20 @@ let check_error ?expected_kind ?expected_reason error = match (expected_kind, error) with | Some `Decode, Wasm_pvm_errors.Decode_error {explanation; _} -> check_reason explanation + | ( Some `No_fallback_decode, + Wasm_pvm_errors.No_fallback_kernel + (Wasm_pvm_errors.Decode_cause {explanation; _}) ) -> + check_reason explanation | Some `Init, Init_error {explanation; _} -> check_reason explanation + | ( Some `No_fallback_init, + Wasm_pvm_errors.No_fallback_kernel + (Wasm_pvm_errors.Init_cause {explanation; _}) ) -> + check_reason explanation | Some `Link, Link_error explanation -> check_reason (Some explanation) + | ( Some `No_fallback_link, + Wasm_pvm_errors.No_fallback_kernel + (Wasm_pvm_errors.Link_cause explanation) ) -> + check_reason (Some explanation) | Some `Eval, Eval_error {explanation; _} -> check_reason explanation | Some `Invalid_state, Invalid_state explanation -> check_reason (Some explanation) diff --git a/src/lib_scoru_wasm/test/test_init.ml b/src/lib_scoru_wasm/test/test_init.ml index 465decf66326..cf3c4593be42 100644 --- a/src/lib_scoru_wasm/test/test_init.ml +++ b/src/lib_scoru_wasm/test/test_init.ml @@ -38,7 +38,7 @@ let test_memory0_export () = let* stuck, _ = eval_until_stuck bad_module_tree in assert ( check_error - ~expected_kind:`Init + ~expected_kind:`No_fallback_init ~expected_reason:"Module must export memory 0" stuck) ; @@ -92,7 +92,7 @@ let test_module_name_size () = let* stuck, _ = eval_until_stuck bad_module_tree in assert ( check_error - ~expected_kind:`Decode + ~expected_kind:`No_fallback_decode ~expected_reason:"Names cannot exceed 512 bytes" stuck) ; let*! good_module_tree = initial_tree (build_module 512) in @@ -136,6 +136,12 @@ let test_imports () = ~module_name:bad_module_name ~item_name:bad_item_name in + let expected_error = + match expected_error with + | Wasm_pvm_errors.(Link_error e) -> + Wasm_pvm_errors.(No_fallback_kernel (Link_cause e)) + | _ -> assert false + in if stuck = expected_error then return_unit else failwith "Unexpected stuck state!" in @@ -153,6 +159,12 @@ let test_imports () = ~module_name:good_module_name ~item_name:bad_item_name in + let expected_error = + match expected_error with + | Wasm_pvm_errors.(Link_error e) -> + Wasm_pvm_errors.(No_fallback_kernel (Link_cause e)) + | _ -> assert false + in if stuck = expected_error then return_unit else failwith "Unexpected stuck state!" in @@ -192,7 +204,7 @@ let test_host_func_start_restriction () = let+ stuck, _ = eval_until_stuck state in assert ( check_error - ~expected_kind:`Init + ~expected_kind:`No_fallback_init ~expected_reason: "host functions must not access memory during initialisation" stuck) @@ -274,7 +286,7 @@ let test_float32_type () = Format.printf "%a\n%!" pp_state (Stuck stuck) ; assert ( check_error - ~expected_kind:`Decode + ~expected_kind:`No_fallback_decode ~expected_reason:"float instructions are forbidden" stuck) @@ -299,7 +311,7 @@ let test_float64_type () = Format.printf "%a\n%!" pp_state (Stuck stuck) ; assert ( check_error - ~expected_kind:`Decode + ~expected_kind:`No_fallback_decode ~expected_reason:"float instructions are forbidden" stuck) @@ -325,7 +337,7 @@ let test_float_value () = Format.printf "%a\n%!" pp_state (Stuck stuck) ; assert ( check_error - ~expected_kind:`Decode + ~expected_kind:`No_fallback_decode ~expected_reason:"float instructions are forbidden" stuck) diff --git a/src/lib_scoru_wasm/test/test_wasm_pvm.ml b/src/lib_scoru_wasm/test/test_wasm_pvm.ml index 3d76e0ed8bc0..0093a586bc1d 100644 --- a/src/lib_scoru_wasm/test/test_wasm_pvm.ml +++ b/src/lib_scoru_wasm/test/test_wasm_pvm.ml @@ -794,7 +794,7 @@ let test_unkown_host_function_truncated () = (* The final reason for being stuck should be truncated after [Wasm_pvm_errors.messages_maximum_size]. *) let reason = String.sub reason 0 Wasm_pvm_errors.messages_maximum_size in - assert (is_stuck ~step:`Link ~reason state) ; + assert (is_stuck ~step:`No_fallback_link ~reason state) ; return_unit let test_bulk_noops () = @@ -1073,8 +1073,8 @@ let test_reveal_upgrade_kernel_ok () = let*! () = assert_fallback_kernel tree @@ Some preimage in return_unit -let test_reveal_upgrade_kernel_fallsback_on_error ~binary ~error invalid_kernel - () = +let test_reveal_upgrade_kernel_fallsback_on_error ~binary ~error:_ + invalid_kernel () = let open Lwt_result_syntax in let*! modul = wat2wasm @@ reveal_upgrade_kernel () in (* Let's first init the tree to compute. *) @@ -1087,63 +1087,53 @@ let test_reveal_upgrade_kernel_fallsback_on_error ~binary ~error invalid_kernel (* At this point, the kernel should be installed, but not as fallback *) let*! () = assert_kernel tree @@ Some modul in let*! () = assert_fallback_kernel tree None in - (* Run the kernel, it should request a preimage reveal *) - let*! tree = set_empty_inbox_step 0l tree in - let*! tree = eval_until_input_requested tree in - (* At this point, the kernel should be installed, including as fallback *) - let*! () = assert_kernel tree @@ Some modul in - let*! () = assert_fallback_kernel tree @@ Some modul in - (* Check that reveal is requested *) - let*! state_after_first_message = - Wasm.Internal_for_tests.get_tick_state tree - in - assert (not @@ is_stuck state_after_first_message) ; - let*! info = Wasm.get_info tree in - let* () = - let open Wasm_pvm_state in - match info.Wasm_pvm_state.input_request with - | Wasm_pvm_state.Reveal_required (Reveal_raw_data hash) -> - (* The PVM has reached a point where it’s asking for some - preimage. Since the memory is left blank, we are looking - for the zero hash *) - let zero_hash = String.make 33 '\000' in - assert (hash = zero_hash) ; - return_unit - | No_input_required | Input_required | Reveal_required _ -> assert false - in let*! preimage = if binary then Lwt.return invalid_kernel else wat2wasm invalid_kernel in - let*! tree = Wasm.reveal_step (Bytes.of_string preimage) tree in - let*! tree = eval_until_input_requested tree in - let*! state_after_reveal = Wasm.Internal_for_tests.get_tick_state tree in - assert (not @@ is_stuck state_after_reveal) ; - (* At this point, the new_kernel should be installed, the old as fallback *) - let*! () = assert_kernel tree @@ Some preimage in - let*! () = assert_fallback_kernel tree @@ Some modul in - (* run until proper input requested *) - let*! tree = eval_until_input_requested tree in - let*! state_after_reveal = Wasm.Internal_for_tests.get_tick_state tree in - assert (not @@ is_stuck state_after_reveal) ; - (* At this point, the new_kernel should be installed, including as fallback *) - let*! () = assert_kernel tree @@ Some preimage in - let*! () = assert_fallback_kernel tree @@ Some modul in + (* Run the kernel, it should request a preimage reveal *) + let run_installer tree level = + let*! tree = set_empty_inbox_step level tree in + let*! tree = eval_until_input_requested tree in + (* At this point, the kernel should be installed, including as fallback *) + let*! () = assert_kernel tree @@ Some modul in + let*! () = assert_fallback_kernel tree @@ Some modul in + (* Check that reveal is requested *) + let*! state_after_first_message = + Wasm.Internal_for_tests.get_tick_state tree + in + assert (not @@ is_stuck state_after_first_message) ; + let*! info = Wasm.get_info tree in + let* () = + let open Wasm_pvm_state in + match info.Wasm_pvm_state.input_request with + | Wasm_pvm_state.Reveal_required (Reveal_raw_data hash) -> + (* The PVM has reached a point where it’s asking for some + preimage. Since the memory is left blank, we are looking + for the zero hash *) + let zero_hash = String.make 33 '\000' in + assert (hash = zero_hash) ; + return_unit + | No_input_required | Input_required | Reveal_required _ -> assert false + in + let*! tree = Wasm.reveal_step (Bytes.of_string preimage) tree in + let*! state_after_reveal = Wasm.Internal_for_tests.get_tick_state tree in + assert (not @@ is_stuck state_after_reveal) ; + let*! tree = eval_until_input_requested tree in + let*! state_after_reveal = Wasm.Internal_for_tests.get_tick_state tree in + assert (not @@ is_stuck state_after_reveal) ; + (* At this point, the new_kernel should be installed, including as fallback *) + let*! () = assert_kernel tree @@ Some preimage in + let*! () = assert_fallback_kernel tree @@ Some modul in + Lwt.return_ok tree + in + let* tree = run_installer tree 0l in (* run with new input, this should be the new kernel *) let*! tree = set_empty_inbox_step 2l tree in let*! tree = eval_until_input_requested tree in let*! state_after_second_message = Wasm.Internal_for_tests.get_tick_state tree in - (* The pvm should be temporarily stuck *) - assert (is_stuck ~step:error state_after_second_message) ; - (* The kernel is set to the invalid one *) - let*! () = assert_kernel tree @@ Some preimage in - let*! () = assert_fallback_kernel tree @@ Some modul in - (* Running the kernel one more tick should result in fallback *) - let*! tree = Wasm.compute_step tree in - let*! post_stuck_state = Wasm.Internal_for_tests.get_tick_state tree in - assert (not @@ is_stuck post_stuck_state) ; - (* The kernel is set to the invalid one *) + assert (not @@ is_stuck state_after_second_message) ; let*! () = assert_kernel tree @@ Some modul in let*! () = assert_fallback_kernel tree @@ Some modul in return_unit diff --git a/src/lib_scoru_wasm/wasm_vm.ml b/src/lib_scoru_wasm/wasm_vm.ml index 7b82d0c6de99..6a66dace6db6 100644 --- a/src/lib_scoru_wasm/wasm_vm.ml +++ b/src/lib_scoru_wasm/wasm_vm.ml @@ -386,6 +386,10 @@ let compute_step pvm_state = let input_request pvm_state = match pvm_state.tick_state with + | Stuck (Decode_error _ | Init_error _ | Link_error _) -> + (* These stuck states are recovered on the next tick by + the fallback mechanism. *) + Wasm_pvm_state.No_input_required | Stuck _ -> Wasm_pvm_state.Input_required | Snapshot -> Wasm_pvm_state.No_input_required | Collect -> Wasm_pvm_state.Input_required -- GitLab From ea90674716034e58dbb8d2b000256332265ab553 Mon Sep 17 00:00:00 2001 From: Emma Turner Date: Tue, 6 Dec 2022 13:56:23 +0000 Subject: [PATCH 3/4] Scoru/Wasm: write upgrade error flag to durable on fallback When an upgrade fails, write the failure to durable storage. This allows the original (fallback) kernel to more gracefully handle the error, rather than blindly trying again as it may do otherwise. --- src/lib_scoru_wasm/constants.ml | 3 +++ src/lib_scoru_wasm/test/test_wasm_pvm.ml | 22 +++++++++++++---- src/lib_scoru_wasm/wasm_vm.ml | 30 ++++++++++++++++++++---- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/lib_scoru_wasm/constants.ml b/src/lib_scoru_wasm/constants.ml index a4c28f9f9f29..5e5a3d356e2a 100644 --- a/src/lib_scoru_wasm/constants.ml +++ b/src/lib_scoru_wasm/constants.ml @@ -54,6 +54,9 @@ let kernel_fallback_key = Durable.key_of_string_exn "/readonly/kernel/boot.wasm" let stuck_flag_key = Durable.key_of_string_exn "/readonly/kernel/env/stuck" +let upgrade_error_flag_key = + Durable.key_of_string_exn "/readonly/kernel/env/upgrade_error" + let too_many_reboot_flag_key = Durable.key_of_string_exn "/readonly/kernel/env/too_many_reboot" diff --git a/src/lib_scoru_wasm/test/test_wasm_pvm.ml b/src/lib_scoru_wasm/test/test_wasm_pvm.ml index 0093a586bc1d..d041564a45e5 100644 --- a/src/lib_scoru_wasm/test/test_wasm_pvm.ml +++ b/src/lib_scoru_wasm/test/test_wasm_pvm.ml @@ -1073,8 +1073,7 @@ let test_reveal_upgrade_kernel_ok () = let*! () = assert_fallback_kernel tree @@ Some preimage in return_unit -let test_reveal_upgrade_kernel_fallsback_on_error ~binary ~error:_ - invalid_kernel () = +let test_reveal_upgrade_kernel_fallsback_on_error ~binary invalid_kernel () = let open Lwt_result_syntax in let*! modul = wat2wasm @@ reveal_upgrade_kernel () in (* Let's first init the tree to compute. *) @@ -1133,9 +1132,25 @@ let test_reveal_upgrade_kernel_fallsback_on_error ~binary ~error:_ let*! state_after_second_message = Wasm.Internal_for_tests.get_tick_state tree in + let has_upgrade_error_flag tree = + let*! durable = wrap_as_durable_storage tree in + let durable = Durable.of_storage_exn durable in + let*! found_error = + Durable.find_value durable Constants.upgrade_error_flag_key + in + Lwt.return_ok @@ Option.is_some found_error + in + (* The pvm should have an upgrade error, but not be stuck *) assert (not @@ is_stuck state_after_second_message) ; + let* has_upgrade_error = has_upgrade_error_flag tree in + assert has_upgrade_error ; + (* The kernel is set to the fallback one *) let*! () = assert_kernel tree @@ Some modul in let*! () = assert_fallback_kernel tree @@ Some modul in + (* The next evaluation should delete the upgrade error. *) + let* tree = run_installer tree 3l in + let* has_upgrade_error = has_upgrade_error_flag tree in + assert (not has_upgrade_error) ; return_unit let test_kernel_reboot_gen ~reboots ~expected_reboots ~pvm_max_reboots = @@ -1627,14 +1642,12 @@ let tests = `Quick (test_reveal_upgrade_kernel_fallsback_on_error ~binary:true - ~error:`Decode "INVALID WASM!!!"); tztest "Test kernel upgrade fallsback on linking error" `Quick (test_reveal_upgrade_kernel_fallsback_on_error ~binary:false - ~error:`Link {| (module (import "invalid_module" "write_debug" @@ -1645,7 +1658,6 @@ let tests = `Quick (test_reveal_upgrade_kernel_fallsback_on_error ~binary:false - ~error:`Init "(module (memory 1))"); tztest "Test scheduling with 10 inputs in a unique inbox" diff --git a/src/lib_scoru_wasm/wasm_vm.ml b/src/lib_scoru_wasm/wasm_vm.ml index 6a66dace6db6..e515ae6144f1 100644 --- a/src/lib_scoru_wasm/wasm_vm.ml +++ b/src/lib_scoru_wasm/wasm_vm.ml @@ -58,6 +58,11 @@ let has_stuck_flag durable = let+ stuck = Durable.(find_value durable Constants.stuck_flag_key) in Option.is_some stuck +let has_upgrade_error_flag durable = + let open Lwt_syntax in + let+ error = Durable.(find_value durable Constants.upgrade_error_flag_key) in + Option.is_some error + let mark_for_reboot {reboot_counter; durable; _} = let open Lwt_syntax in let+ has_reboot_flag = has_reboot_flag durable in @@ -114,6 +119,14 @@ let unsafe_next_tick_state ({buffers; durable; tick_state; _} as pvm_state) = Constants.kernel_fallback_key Constants.kernel_key in + let* durable = + Durable.write_value_exn + ~edit_readonly:true + durable + Constants.upgrade_error_flag_key + 0L + "" + in return ~durable Padding else return ~status:Failing (Stuck (No_fallback_kernel cause)) | Stuck e -> return ~status:Failing (Stuck e) @@ -249,10 +262,19 @@ let unsafe_next_tick_state ({buffers; durable; tick_state; _} as pvm_state) = | _ when eval_has_finished tick_state -> (* We have an empty set of admin instructions, but need to wait until we can restart *) let* has_stuck_flag = has_stuck_flag durable in - if has_stuck_flag then - let* durable = Durable.(delete durable Constants.stuck_flag_key) in - return ~durable Padding - else return Padding + let* durable = + if has_stuck_flag then + Durable.(delete ~edit_readonly:true durable Constants.stuck_flag_key) + else Lwt.return durable + in + let* has_upgrade_error_flag = has_upgrade_error_flag durable in + let* durable = + if has_upgrade_error_flag then + Durable.( + delete ~edit_readonly:true durable Constants.upgrade_error_flag_key) + else Lwt.return durable + in + return ~durable Padding | Eval {config; module_reg} -> (* Continue execution. *) let store = Durable.to_storage durable in -- GitLab From 6a71dc4d6bf366d196315f6dfd7f0af6860985ea Mon Sep 17 00:00:00 2001 From: Emma Turner Date: Tue, 6 Dec 2022 15:12:07 +0000 Subject: [PATCH 4/4] Scoru/Wasm VM: patch error flags on successful fast execution run --- src/lib_scoru_wasm/fast/exec.ml | 5 ++++- src/lib_scoru_wasm/wasm_vm.ml | 32 +++++++++++++++++++------------- src/lib_scoru_wasm/wasm_vm.mli | 5 +++++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/lib_scoru_wasm/fast/exec.ml b/src/lib_scoru_wasm/fast/exec.ml index bd1d39f94efa..352d6cf3ea02 100644 --- a/src/lib_scoru_wasm/fast/exec.ml +++ b/src/lib_scoru_wasm/fast/exec.ml @@ -28,6 +28,8 @@ open Tezos_scoru_wasm module Wasmer = Tezos_wasmer module Lazy_containers = Tezos_lazy_containers +include (Wasm_vm : Wasm_vm_sig.S) + let store = Lazy.from_fun @@ fun () -> let engine = Wasmer.Engine.create Wasmer.Config.{compiler = SINGLEPASS} in @@ -73,7 +75,8 @@ let compute builtins durable buffers = Wasmer.Instance.delete instance ; + let* durable = Wasm_vm.patch_flags_on_eval_successful host_state.durable in (* TODO: #4283 The module is cached, but the cash is never cleaned. This is the point where it was scrubed before.*) - Lwt.return host_state.durable + Lwt.return durable diff --git a/src/lib_scoru_wasm/wasm_vm.ml b/src/lib_scoru_wasm/wasm_vm.ml index e515ae6144f1..bb97ee534782 100644 --- a/src/lib_scoru_wasm/wasm_vm.ml +++ b/src/lib_scoru_wasm/wasm_vm.ml @@ -63,6 +63,24 @@ let has_upgrade_error_flag durable = let+ error = Durable.(find_value durable Constants.upgrade_error_flag_key) in Option.is_some error +let patch_flags_on_eval_successful durable = + let open Lwt_syntax in + (* We have an empty set of admin instructions, but need to wait until we can restart *) + let* has_stuck_flag = has_stuck_flag durable in + let* durable = + if has_stuck_flag then + Durable.(delete ~edit_readonly:true durable Constants.stuck_flag_key) + else Lwt.return durable + in + let* has_upgrade_error_flag = has_upgrade_error_flag durable in + let+ durable = + if has_upgrade_error_flag then + Durable.( + delete ~edit_readonly:true durable Constants.upgrade_error_flag_key) + else Lwt.return durable + in + durable + let mark_for_reboot {reboot_counter; durable; _} = let open Lwt_syntax in let+ has_reboot_flag = has_reboot_flag durable in @@ -261,19 +279,7 @@ let unsafe_next_tick_state ({buffers; durable; tick_state; _} as pvm_state) = return ~durable Padding | _ when eval_has_finished tick_state -> (* We have an empty set of admin instructions, but need to wait until we can restart *) - let* has_stuck_flag = has_stuck_flag durable in - let* durable = - if has_stuck_flag then - Durable.(delete ~edit_readonly:true durable Constants.stuck_flag_key) - else Lwt.return durable - in - let* has_upgrade_error_flag = has_upgrade_error_flag durable in - let* durable = - if has_upgrade_error_flag then - Durable.( - delete ~edit_readonly:true durable Constants.upgrade_error_flag_key) - else Lwt.return durable - in + let* durable = patch_flags_on_eval_successful durable in return ~durable Padding | Eval {config; module_reg} -> (* Continue execution. *) diff --git a/src/lib_scoru_wasm/wasm_vm.mli b/src/lib_scoru_wasm/wasm_vm.mli index b06de110f95b..d5a482a26257 100644 --- a/src/lib_scoru_wasm/wasm_vm.mli +++ b/src/lib_scoru_wasm/wasm_vm.mli @@ -49,6 +49,11 @@ val compute_step_many_until : finished successfully. *) val eval_has_finished : tick_state -> bool +(** [patch_flags_on_eval_successful durable] clears flags set by + previous attempted runs of kernel_run. Once an evaluation has + succeeded, these can be safely deleted. *) +val patch_flags_on_eval_successful : Durable.t -> Durable.t Lwt.t + (** [should_compute pvm_state] probes whether it is possible to continue with more computational steps. *) val should_compute : pvm_state -> bool -- GitLab