From b761fbd016c0c2d1cf4cba6d1be0276a754a3f6a Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Thu, 20 Apr 2023 23:24:32 +0200 Subject: [PATCH 1/3] WASM/Debugger: Custom section parsing --- src/bin_wasm_debugger/custom_section.ml | 138 ++++++++++++++++++++ src/bin_wasm_debugger/main_wasm_debugger.ml | 26 +++- 2 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 src/bin_wasm_debugger/custom_section.ml diff --git a/src/bin_wasm_debugger/custom_section.ml b/src/bin_wasm_debugger/custom_section.ml new file mode 100644 index 000000000000..207d072749c5 --- /dev/null +++ b/src/bin_wasm_debugger/custom_section.ml @@ -0,0 +1,138 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2023 Nomadic Labs *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +(** + This files implements the parsing of custom subsection, especially the `name` + custom section (see + https://webassembly.github.io/spec/core/appendix/custom.html#name-section). + + The `name` section has the following format: + [h] [len] [vec_len:n] ([index] [name_len] [name])^n + where + - [h] is a tag encoded in a single byte (`1` for the functions subsection) + - [len] is a variable-length unsigned 32bits integer (`vu32`), which is + the length of the subsection + - [vec_len] (`vu32`) encoding the number of values in the vector + then for each value of the vector: + - [index] (`vu32`) encoding the function representation + - [name_len] (`vu32`) encoding the length in bytes of the name + - [name] (`utf8`) bytes of length `name_len` encoding an utf8 + representation of the symbol + +*) + +(* Adapted from {Tezos_lib_webassembly.Decode} *) +let rec vuN n bytes index = + let b, next_index = (String.get bytes index |> Char.code, succ index) in + assert (n >= 7 || b land 0x7f < 1 lsl n) ; + let x = Int64.of_int (b land 0x7f) in + if b land 0x80 = 0 then (x, next_index) + else + let v, next_index = vuN (n - 7) bytes next_index in + (Int64.(logor x (shift_left v 7)), next_index) + +let vu32 bytes index = + let value, next_index = vuN 32 bytes index in + (Int64.to_int32 value, next_index) + +(** [parse_subsection_header bytes index] reads the tag for the subsection and + its length, and returns the next index to continue reading. Returns `None` + if there are not at least 2 bytes to read. *) +let parse_subsection_header bytes start = + (* At least two string: one for the header, and at least one for the length of + the subsection. *) + if String.length bytes < start + 2 then None + else + let len, next_index = vu32 bytes (start + 1) in + Some (String.get bytes 0, len, next_index) + +let u32_to_int u = + match Int32.unsigned_to_int u with None -> assert false | Some i -> i + +(** [get_function_name_section_indexes bytes] returns the starting index of the + `functions` subsection and its length. *) +let get_function_name_section_indexes bytes = + let rec parse next_index = + match parse_subsection_header bytes next_index with + | None -> None + | Some ('\001', len, next_index) -> Some (next_index, len) + | Some (_, len, next_index) -> parse (next_index + u32_to_int len) + in + parse 0 + +(** [parse_nameassoc bytes index] parses a `(index, name)` encoded value and + returns the index to continue the reading. *) +let parse_nameassoc bytes start = + let idx, next_index = vu32 bytes start in + let name_len, start_index = vu32 bytes next_index in + let name_len = u32_to_int name_len in + let buffer = Buffer.create name_len in + let rec decode string index = + if index >= name_len + start_index then index + else + let uchar = String.get_utf_8_uchar string index in + (if Uchar.utf_decode_is_valid uchar then + let u = Uchar.utf_decode_uchar uchar in + if Uchar.is_char u then Buffer.add_char buffer (Uchar.to_char u)) ; + decode string (index + Uchar.utf_decode_length uchar) + in + let index = decode bytes start_index in + let name = Buffer.contents buffer in + ((idx, name), index) + +module FuncMap = Map.Make (Int32) + +(** [parse_vec bytes start parse_value] parses an encoded vector and its values + with [parse_value]. *) +let parse_vec bytes start parse_value = + let len, next_index = vu32 bytes start in + let len = u32_to_int len in + let rec parse_values index nth acc = + if nth >= len then acc + else + let value, next_index = parse_value bytes index in + parse_values next_index (succ nth) (Seq.cons value acc) + in + parse_values next_index 0 Seq.empty + +(** [parse_function_subsection bytes] parse and returns the `functions` + subsection, as described by the reference documentation. *) +let parse_function_subsection subsection = + match get_function_name_section_indexes subsection with + | None -> FuncMap.empty + | Some (start, _len) -> + parse_vec subsection start parse_nameassoc |> FuncMap.of_seq + +(** [pp_function_subsection ppf map] pretty-prints the parsed functions + subsection. *) +let pp_function_subsection ppf map = + let pp_assoc ppf (idx, name) = + Format.fprintf ppf " - func[%ld] <%s>" idx name + in + FuncMap.to_seq map + |> Format.pp_print_seq + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + pp_assoc + ppf diff --git a/src/bin_wasm_debugger/main_wasm_debugger.ml b/src/bin_wasm_debugger/main_wasm_debugger.ml index 2cef99a9a08d..ccbb9a0cb566 100644 --- a/src/bin_wasm_debugger/main_wasm_debugger.ml +++ b/src/bin_wasm_debugger/main_wasm_debugger.ml @@ -26,10 +26,24 @@ open Wasm_utils (* [parse_binary_module module_name module_stream] parses a binary encoded - module. Parsing outside of the PVM allows locations in case of errors. *) + module and its custom sections. Parsing outside of the PVM allows locations + in case of errors. *) let parse_binary_module name module_ = + let open Lwt_syntax in let bytes = Tezos_lazy_containers.Chunked_byte_vector.of_string module_ in - Tezos_webassembly_interpreter.Decode.decode ~allow_floats:false ~name ~bytes + let* modl_ = + Tezos_webassembly_interpreter.Decode.decode ~allow_floats:false ~name ~bytes + in + let+ custom = + Tezos_webassembly_interpreter.Decode.decode_custom "name" ~name ~bytes + in + let functions_section = + List.map Custom_section.parse_function_subsection custom + |> List.fold_left + (Custom_section.FuncMap.merge (fun _ -> Option.either)) + Custom_section.FuncMap.empty + in + (modl_, functions_section) (* [typecheck_module module_ast] runs the typechecker on the module, which is not done by the PVM. *) @@ -61,10 +75,10 @@ let link module_ = let handle_module version binary name module_ = let open Lwt_result_syntax in let open Tezos_protocol_alpha.Protocol.Alpha_context.Sc_rollup in - let* ast = + let* ast, functions_section = Repl_helpers.trap_exn (fun () -> if binary then parse_binary_module name module_ - else Lwt.return (parse_module module_)) + else Lwt.return (parse_module module_, Custom_section.FuncMap.empty)) in let* () = typecheck_module ast in let* () = import_pvm_host_functions ~version () in @@ -79,7 +93,7 @@ let handle_module version binary name module_ = module_ in let*! tree = eval_until_input_requested tree in - return tree + return (tree, functions_section) let start version binary file = let open Lwt_result_syntax in @@ -196,7 +210,7 @@ let main_command = else if Filename.check_suffix wasm_file ".wast" then Ok false else error_with "Kernels should have .wasm or .wast file extension" in - let* tree = start version binary wasm_file in + let* tree, _ = start version binary wasm_file in let* inboxes = match inputs with | Some inputs -> Messages.parse_inboxes inputs config -- GitLab From cc67e60c0da9e65b05bd8c200ee17a3a2c3442c9 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Fri, 21 Apr 2023 11:20:33 +0200 Subject: [PATCH 2/3] WASM/Debugger: add `dump function symbols` command --- src/bin_wasm_debugger/commands.ml | 15 ++++++++++++++- src/bin_wasm_debugger/main_wasm_debugger.ml | 12 ++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/bin_wasm_debugger/commands.ml b/src/bin_wasm_debugger/commands.ml index c113f4c4b277..8fb098cfa971 100644 --- a/src/bin_wasm_debugger/commands.ml +++ b/src/bin_wasm_debugger/commands.ml @@ -45,6 +45,7 @@ type commands = | Show_subkeys of string | Show_key of string * printable_value_kind | Show_memory of int32 * int * printable_value_kind + | Dump_function_symbols | Step of eval_step | Load_inputs | Reveal_preimage of string option @@ -98,6 +99,7 @@ let parse_commands s = | Some kind -> Show_key (key, kind) | None -> Unknown s) | "show" :: "memory" :: rest -> parse_memory_commands s rest + | ["dump"; "function"; "symbols"] -> Dump_function_symbols | ["step"; step] -> ( match parse_eval_step step with Some s -> Step s | None -> Unknown s) | ["load"; "inputs"] -> Load_inputs @@ -508,6 +510,14 @@ let show_memory tree address length kind = state | exn -> Lwt_io.printf "Error: %s\n%!" (Printexc.to_string exn)) +type extra = {functions : string Custom_section.FuncMap.t} + +let dump_function_symbols extra = + let functions = + Format.asprintf "%a" Custom_section.pp_function_subsection extra.functions + in + Lwt_io.printf "Functions:\n%s\n" functions + (* [reveal_preimage config hex tree] checks the current state is waiting for a preimage, parses [hex] as an hexadecimal representation of the data or use the builtin if none is given, and does a reveal step. *) @@ -544,7 +554,7 @@ let reveal_metadata config tree = (* [handle_command command tree inboxes level] dispatches the commands to their actual implementation. *) -let handle_command c config tree inboxes level = +let handle_command c config extra tree inboxes level = let open Lwt_result_syntax in let command = parse_commands c in let return ?(tree = tree) ?(inboxes = inboxes) () = @@ -589,6 +599,9 @@ let handle_command c config tree inboxes level = | Show_memory (address, length, kind) -> let*! () = show_memory tree address length kind in return () + | Dump_function_symbols -> + let*! () = dump_function_symbols extra in + return () | Reveal_preimage bytes -> let*! tree = reveal_preimage config bytes tree in return ~tree () diff --git a/src/bin_wasm_debugger/main_wasm_debugger.ml b/src/bin_wasm_debugger/main_wasm_debugger.ml index ccbb9a0cb566..92fad5fdd9ef 100644 --- a/src/bin_wasm_debugger/main_wasm_debugger.ml +++ b/src/bin_wasm_debugger/main_wasm_debugger.ml @@ -75,7 +75,7 @@ let link module_ = let handle_module version binary name module_ = let open Lwt_result_syntax in let open Tezos_protocol_alpha.Protocol.Alpha_context.Sc_rollup in - let* ast, functions_section = + let* ast, functions = Repl_helpers.trap_exn (fun () -> if binary then parse_binary_module name module_ else Lwt.return (parse_module module_, Custom_section.FuncMap.empty)) @@ -93,7 +93,7 @@ let handle_module version binary name module_ = module_ in let*! tree = eval_until_input_requested tree in - return (tree, functions_section) + return (tree, Commands.{functions}) let start version binary file = let open Lwt_result_syntax in @@ -102,7 +102,7 @@ let start version binary file = handle_module version binary module_name buffer (* REPL main loop: reads an input, does something out of it, then loops. *) -let repl tree inboxes level config = +let repl tree inboxes level config extra = let open Lwt_result_syntax in let rec loop tree inboxes level = let*! () = Lwt_io.printf "> " in @@ -116,7 +116,7 @@ let repl tree inboxes level config = match input with | Some command -> let* tree, inboxes, level = - Commands.handle_command command config tree inboxes level + Commands.handle_command command config extra tree inboxes level in loop tree inboxes level | None -> return tree @@ -210,13 +210,13 @@ let main_command = else if Filename.check_suffix wasm_file ".wast" then Ok false else error_with "Kernels should have .wasm or .wast file extension" in - let* tree, _ = start version binary wasm_file in + let* tree, extra = start version binary wasm_file in let* inboxes = match inputs with | Some inputs -> Messages.parse_inboxes inputs config | None -> return [] in - let+ _tree = repl tree inboxes 0l config in + let+ _tree = repl tree inboxes 0l config extra in ()) (* List of program commands *) -- GitLab From 8f5358a945d020aaa86c6c13e5c006d888beab74 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Fri, 21 Apr 2023 12:03:15 +0200 Subject: [PATCH 3/3] WASM/Debugger: Update Changelog for `dump function symbols` --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index fe986fe5ec80..dbc762391337 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -188,6 +188,9 @@ Smart Rollup WASM Debugger - Automatically ``load inputs`` when ``step inbox`` is called. (MR :gl:`!8444`) +- Added a command ``show function symbols`` to inspect the custom section + ``name`` of unstripped kernels (MR :gl:`!8522`) + Miscellaneous ------------- -- GitLab