diff --git a/src/lib_webassembly/bin/text/arrange.ml b/src/lib_webassembly/bin/text/arrange.ml index 3446112b9bb7c72226b540c3333dc1f764068ffb..3273a8a53c26235f069862d94c50e956919a6cc2 100644 --- a/src/lib_webassembly/bin/text/arrange.ml +++ b/src/lib_webassembly/bin/text/arrange.ml @@ -604,7 +604,7 @@ let elem i seg = let data i seg = let open Lwt.Syntax in let {dinit; dmode} = seg.it in - let+ dinit = Chunked_byte_vector.Lwt.Buffer.to_string_unstable dinit in + let+ dinit = Chunked_byte_vector.Lwt.to_string dinit in Node ("data $" ^ nat i, segment_mode "memory" dmode @ break_bytes dinit) (* Modules *) diff --git a/src/lib_webassembly/bin/text/parser.mly b/src/lib_webassembly/bin/text/parser.mly index 7d1288bcbafb60fbe44b1706cbe6f849ccd15329..9169bbb0ca2e35fb4a3cc9a50e0fe1b1ee24cf0f 100644 --- a/src/lib_webassembly/bin/text/parser.mly +++ b/src/lib_webassembly/bin/text/parser.mly @@ -980,17 +980,17 @@ data : | LPAR DATA bind_var_opt string_list RPAR { let at = at () in fun c -> ignore ($3 c anon_data bind_data); - fun () -> {dinit = Chunked_byte_vector.Lwt.Buffer.of_string $4; dmode = Passive @@ at} @@ at } + fun () -> {dinit = Chunked_byte_vector.Lwt.of_string $4; dmode = Passive @@ at} @@ at } | LPAR DATA bind_var_opt memory_use offset string_list RPAR { let at = at () in fun c -> ignore ($3 c anon_data bind_data); fun () -> - {dinit = Chunked_byte_vector.Lwt.Buffer.of_string $6; dmode = Active {index = $4 c memory; offset = $5 c} @@ at} @@ at } + {dinit = Chunked_byte_vector.Lwt.of_string $6; dmode = Active {index = $4 c memory; offset = $5 c} @@ at} @@ at } | LPAR DATA bind_var_opt offset string_list RPAR /* Sugar */ { let at = at () in fun c -> ignore ($3 c anon_data bind_data); fun () -> - {dinit = Chunked_byte_vector.Lwt.Buffer.of_string $5; dmode = Active {index = 0l @@ at; offset = $4 c} @@ at} @@ at } + {dinit = Chunked_byte_vector.Lwt.of_string $5; dmode = Active {index = 0l @@ at; offset = $4 c} @@ at} @@ at } memory : | LPAR MEMORY bind_var_opt memory_fields RPAR @@ -1014,7 +1014,7 @@ memory_fields : let offset = [i32_const (0l @@ at) @@ at] @@ at in let size = Int32.(div (add (of_int (String.length $3)) 65535l) 65536l) in [{mtype = MemoryType {min = size; max = Some size}} @@ at], - [{dinit = Chunked_byte_vector.Lwt.Buffer.of_string $3; dmode = Active {index = x; offset} @@ at} @@ at], + [{dinit = Chunked_byte_vector.Lwt.of_string $3; dmode = Active {index = x; offset} @@ at} @@ at], [], [] } global : diff --git a/src/lib_webassembly/binary/decode.ml b/src/lib_webassembly/binary/decode.ml index f928b98ec951fe5e81f66e052772748fa9a630b5..29ff79afe09843f357ae2fec51d3e07802c9f9de 100644 --- a/src/lib_webassembly/binary/decode.ml +++ b/src/lib_webassembly/binary/decode.ml @@ -161,26 +161,24 @@ let sized f s = (** Incremental chunked byte vector creation (from implicit input). *) type byte_vector_kont = | VKStart (** Initial step. *) - | VKRead of Chunked_byte_vector.Lwt.Buffer.t * int * int + | VKRead of Chunked_byte_vector.Lwt.t * int64 * int64 (** Reading step, containing the current position in the string and the length, reading byte per byte. *) - | VKStop of Chunked_byte_vector.Lwt.Buffer.t - (** Final step, cannot reduce. *) + | VKStop of Chunked_byte_vector.Lwt.t (** Final step, cannot reduce. *) let byte_vector_step s = let open Lwt.Syntax in function | VKStart -> - let len = len32 s in - let vector = - len |> Int64.of_int |> Chunked_byte_vector.Lwt.Buffer.create - in - VKRead (vector, 0, len) |> Lwt.return - | VKRead (vector, index, len) when index >= len -> VKStop vector |> Lwt.return + let len = len32 s |> Int64.of_int in + let vector = Chunked_byte_vector.Lwt.create len in + VKRead (vector, 0L, len) |> Lwt.return + | VKRead (vector, index, len) when Int64.compare index len >= 0 -> + VKStop vector |> Lwt.return | VKRead (vector, index, len) -> let c = get s in - let+ vector = Chunked_byte_vector.Lwt.Buffer.add_byte vector c in - VKRead (vector, index + 1, len) + let+ () = Chunked_byte_vector.Lwt.store_byte vector index c in + VKRead (vector, Int64.succ index, len) (* Final step, cannot reduce *) | VKStop vector -> assert false diff --git a/src/lib_webassembly/binary/decode.mli b/src/lib_webassembly/binary/decode.mli index 9842770a38764a8b081b5c87b0ba4730d15325e3..9fda66ccf00446f44ba140e64ebea89aaf67847e 100644 --- a/src/lib_webassembly/binary/decode.mli +++ b/src/lib_webassembly/binary/decode.mli @@ -38,11 +38,10 @@ type size = {size : int; start : pos} (** Incremental chunked byte vector creation (from implicit input). *) type byte_vector_kont = | VKStart (** Initial step. *) - | VKRead of Chunked_byte_vector.Lwt.Buffer.t * pos * int + | VKRead of Chunked_byte_vector.Lwt.t * int64 * int64 (** Reading step, containing the current position in the string and the length, reading byte per byte. *) - | VKStop of Chunked_byte_vector.Lwt.Buffer.t - (** Final step, cannot reduce. *) + | VKStop of Chunked_byte_vector.Lwt.t (** Final step, cannot reduce. *) type name_step = | NKStart (** UTF8 name starting point. *) diff --git a/src/lib_webassembly/binary/encode.ml b/src/lib_webassembly/binary/encode.ml index 718ae00d5f54ae69f6b26de6a7d4536bd0d8e3da..931951db658b3d9d00b7962ab858e9a687e3ecde 100644 --- a/src/lib_webassembly/binary/encode.ml +++ b/src/lib_webassembly/binary/encode.ml @@ -1128,7 +1128,7 @@ struct let data seg = let open Lwt.Syntax in let {dinit; dmode} = seg.it in - let+ dinit = Chunked_byte_vector.Lwt.Buffer.to_string_unstable dinit in + let+ dinit = Chunked_byte_vector.Lwt.to_string dinit in match dmode.it with | Passive -> vu32 0x01l ; diff --git a/src/lib_webassembly/exec/eval.ml b/src/lib_webassembly/exec/eval.ml index 44d80d412f65a517325d38ad4f61f74cd992144a..612d060224b3c664dc36a8ca27e9b96716a879dd 100644 --- a/src/lib_webassembly/exec/eval.ml +++ b/src/lib_webassembly/exec/eval.ml @@ -879,8 +879,7 @@ let create_elem (inst : module_inst) (seg : elem_segment) : elem_inst Lwt.t = let create_data (inst : module_inst) (seg : data_segment) : data_inst Lwt.t = let {dinit; _} = seg.it in - let data = Chunked_byte_vector.Lwt.Buffer.to_byte_vector dinit in - Lwt.return (ref data) + Lwt.return (ref dinit) let add_import (m : module_) (ext : extern) (im : import) (inst : module_inst) : module_inst Lwt.t = @@ -940,8 +939,7 @@ let run_data i data = Const (I32 (Int32.of_int - (Int64.to_int - (Chunked_byte_vector.Lwt.Buffer.length data.it.dinit))) + (Int64.to_int (Chunked_byte_vector.Lwt.length data.it.dinit))) @@ at) @@ at; MemoryInit x @@ at; diff --git a/src/lib_webassembly/syntax/ast.ml b/src/lib_webassembly/syntax/ast.ml index 63305fd9cf6d5a709145317a7f91504fef9c8db4..c64840b797d4494dbc2879fa3026526b1ae23fdd 100644 --- a/src/lib_webassembly/syntax/ast.ml +++ b/src/lib_webassembly/syntax/ast.ml @@ -367,10 +367,7 @@ and elem_segment' = { type data_segment = data_segment' Source.phrase -and data_segment' = { - dinit : Chunked_byte_vector.Lwt.Buffer.t; - dmode : segment_mode; -} +and data_segment' = {dinit : Chunked_byte_vector.Lwt.t; dmode : segment_mode} (* Modules *) diff --git a/src/lib_webassembly/util/chunked_byte_vector.ml b/src/lib_webassembly/util/chunked_byte_vector.ml index 92065a83bfd38b40d38fdf9dccada71f09dad670..46491d79be678d6f9a5fe90fcba1f9c0f1608383 100644 --- a/src/lib_webassembly/util/chunked_byte_vector.ml +++ b/src/lib_webassembly/util/chunked_byte_vector.ml @@ -76,6 +76,10 @@ module type S = sig val of_bytes : bytes -> t effect + val to_string : t -> string effect + + val to_bytes : t -> bytes effect + val grow : t -> int64 -> unit val length : t -> int64 @@ -87,24 +91,6 @@ module type S = sig val store_bytes : t -> int64 -> bytes -> unit effect val loaded_chunks : t -> (int64 * Chunk.t) list - - module Buffer : sig - type vector := t - - type t - - val create : Int64.t -> t - - val length : t -> int64 - - val add_byte : t -> int -> t effect - - val of_string : string -> t - - val to_string_unstable : t -> string effect - - val to_byte_vector : t -> vector - end end module Make (Effect : Effect.S) : S with type 'a effect = 'a Effect.t = struct @@ -194,48 +180,48 @@ module Make (Effect : Effect.S) : S with type 'a effect = 'a Effect.t = struct let+ () = store_bytes vector 0L bytes in vector - let loaded_chunks vector = - Vector.Vector.loaded_bindings (Vector.snapshot vector.chunks) - - module Buffer = struct - type nonrec t = {vector : t; offset : int64} - - let length {vector; _} = length vector - - let add_byte {vector; offset} b = - let open Effect in - let+ () = store_byte vector offset b in - {vector; offset = Int64.succ offset} - - let of_string str = - let offset = String.length str |> Int64.of_int in - let vector = of_string str in - {vector; offset} - - let create length = {vector = create length; offset = 0L} - - (* This function makes a lot of conversion from Int64 to native int but it - should be called only when converting a parsed data segment into a string - (when writing a parsed module into its binary or text representation). - - @raise Invalid_argument "Chunked_byte.vector.to_string" if the size of the - vector is greater than [Sys.max_string_length]. *) - let to_string_unstable {vector; offset} = - let open Effect in - if offset > Int64.of_int Sys.max_string_length then - invalid_arg "Chunked_byte_vector.to_string" - else - let buff = Bytes.create (Int64.to_int offset) in - let+ () = - List.init (Int64.to_int offset) (fun i -> - let+ b = load_byte vector (Int64.of_int i) in - Bytes.set buff i (Char.chr b)) - |> join + let to_bytes vector = + let open Effect in + let chunks_number = Vector.num_elements vector.chunks in + if vector.length > Int64.of_int Sys.max_string_length then + raise Memory_exn.Bounds ; + (* Once we ensure the vector can be contained in a string, we can safely + convert everything to int, since the size of the vector is contained in + a `nativeint`. See {!of_string} comment. *) + let buffer = Bytes.create (Int64.to_int vector.length) in + let add_chunk index chunk = + let rem = + (* The last chunk (at `length - 1`) is not necessarily of size + [Chunk.size], i.e. if the length of the chunked_byte_vector is not a + multiple of [Chunk.size]. *) + if index >= Int64.pred chunks_number then + Int64.rem vector.length Chunk.size + else Chunk.size + in + for offset = 0 to Int64.to_int rem - 1 do + let address = + Chunk.address ~index ~offset:(Int64.of_int offset) |> Int64.to_int in - Bytes.to_string buff + Bytes.set buffer address (Char.chr @@ Array1.get chunk offset) + done + in + let rec fold index = + if index >= chunks_number then Effect.return () + else + let* chunk = Vector.get index vector.chunks in + add_chunk index chunk ; + fold (Int64.succ index) + in + let+ () = fold 0L in + buffer - let to_byte_vector {vector; _} = vector - end + let to_string vector = + let open Effect in + let+ buffer = to_bytes vector in + Bytes.to_string buffer + + let loaded_chunks vector = + Vector.Vector.loaded_bindings (Vector.snapshot vector.chunks) end include Make (Effect.Identity) diff --git a/src/lib_webassembly/util/chunked_byte_vector.mli b/src/lib_webassembly/util/chunked_byte_vector.mli index 630c31a6e54449d1f5db96efc0b665d5c2e025a1..eac710773f7f06b9e9e9ada9ad034dc2dda86549 100644 --- a/src/lib_webassembly/util/chunked_byte_vector.mli +++ b/src/lib_webassembly/util/chunked_byte_vector.mli @@ -47,6 +47,12 @@ module type S = sig turning your [bytes] into a [string] would be potentially expensive. *) val of_bytes : bytes -> t effect + (** [to_string vector] creates a string from the given [vector]. *) + val to_string : t -> string effect + + (** [to_bytes vector] creates a bytes from the given [vector]. *) + val to_bytes : t -> bytes effect + (** [grow vector length_delta] increases the byte vector length by [length_delta]. *) val grow : t -> int64 -> unit @@ -68,42 +74,6 @@ module type S = sig been cached in-memory since [vector] has been created, either by reading its contents, or by modifying it. *) val loaded_chunks : t -> (int64 * Chunk.t) list - - module Buffer : sig - type vector := t - - (** Type of buffer using an underlying chunked byte vector. *) - type t - - (** [create length] creates a buffer that has capacity for [length] bytes. *) - val create : Int64.t -> t - - (** [length buffer] returns the length of [buffer] in bytes. *) - val length : t -> int64 - - (** [add_byte buffer byte] set the next byte in the buffer [buffer] to [byte]. *) - val add_byte : t -> int -> t effect - - (** [of_string str] creates a chunked byte vector from the given [str]. *) - val of_string : string -> t - - (** [to_string_unstable buffer] creates a string from the given buffer - [buffer]. - - This function should never be called after converting a buffer to a byte - vector, since the byte vector is only dereferenced and never copied. See - {!to_byte_vector}. - - @raise [Invalid_argument "Chunked_byte.vector.to_string_unstable"] if the - size of the vector is greater than [Sys.max_string_length]. *) - val to_string_unstable : t -> string effect - - (** [to_byte_vector buffer] returns the underlying byte vector from the buffer - [buffer]. Note that it only dereferences the byte vector, not making a - copy. As such, it is unsafe to continue to add bytes to the buffer since - it will affect the byte vector. *) - val to_byte_vector : t -> vector - end end include S with type 'a effect = 'a