diff --git a/manifest/main.ml b/manifest/main.ml index 01d8d3578c1397dd1623c96ddedfb827358a2a97..78dc723895e7b5993636d5d1c9030cf6eec2da94 100644 --- a/manifest/main.ml +++ b/manifest/main.ml @@ -1813,7 +1813,12 @@ let octez_base_p2p_identity_file = let _octez_base_tests = tezt [ - "test_bounded"; "test_time"; "test_protocol"; "test_p2p_addr"; "test_sized"; + "test_bounded"; + "test_time"; + "test_protocol"; + "test_p2p_addr"; + "test_sized"; + "test_skip_list"; ] ~path:"src/lib_base/test" ~opam:"tezos-base" diff --git a/src/lib_base/skip_list.ml b/src/lib_base/skip_list.ml new file mode 100644 index 0000000000000000000000000000000000000000..928a4514f99513de6dda51a0915c56af5c4a5a7c --- /dev/null +++ b/src/lib_base/skip_list.ml @@ -0,0 +1,583 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2022 Nomadic Labs *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +module type MONAD = sig + type 'a t + + val bind : 'a t -> ('a -> 'b t) -> 'b t + + val return : 'a -> 'a t +end + +module type S = sig + type ('content, 'ptr) cell + + val pp : + pp_ptr:(Format.formatter -> 'ptr -> unit) -> + pp_content:(Format.formatter -> 'content -> unit) -> + Format.formatter -> + ('content, 'ptr) cell -> + unit + + val equal : + ('ptr -> 'ptr -> bool) -> + ('content -> 'content -> bool) -> + ('content, 'ptr) cell -> + ('content, 'ptr) cell -> + bool + + val encoding : + 'ptr Data_encoding.t -> + 'content Data_encoding.t -> + ('content, 'ptr) cell Data_encoding.t + + val index : (_, _) cell -> Z.t + + val content : ('content, 'ptr) cell -> 'content + + val back_pointer : ('content, 'ptr) cell -> int -> 'ptr option + + val back_pointers : ('content, 'ptr) cell -> 'ptr list + + val genesis : 'content -> ('content, 'ptr) cell + + val next : + prev_cell:('content, 'ptr) cell -> + prev_cell_ptr:'ptr -> + 'content -> + ('content, 'ptr) cell + + type ('ptr, 'content) search_cell_result = + | Found of ('ptr, 'content) cell + | Nearest of { + lower : ('ptr, 'content) cell; + upper : ('ptr, 'content) cell option; + } + | No_exact_or_lower_ptr + | Deref_returned_none + + type ('ptr, 'content) search_result = { + rev_path : ('ptr, 'content) cell list; + last_cell : ('ptr, 'content) search_cell_result; + } + + val pp_search_result : + pp_cell:(Format.formatter -> ('ptr, 'content) cell -> unit) -> + Format.formatter -> + ('ptr, 'content) search_result -> + unit + + module type MONADIC = sig + type 'a result + + val find : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + ('content, 'ptr) cell option result + + val back_path : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + 'ptr list option result + + val valid_back_path : + equal_ptr:('ptr -> 'ptr -> bool) -> + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_ptr:'ptr -> + 'ptr list -> + bool result + + val search : + deref:('ptr -> ('content, 'ptr) cell option result) -> + compare:('content -> int) -> + cell:('content, 'ptr) cell -> + ('content, 'ptr) search_result result + end + + include MONADIC with type 'a result := 'a + + module Lwt : MONADIC with type 'a result := 'a Lwt.t + + module Make_monadic (M : MONAD) : MONADIC with type 'a result := 'a M.t +end + +module Make (Parameters : sig + val basis : int +end) : S = struct + let () = assert (Compare.Int.(Parameters.basis >= 2)) + + open Parameters + + (* + + A cell of a skip list with some [`content] and back pointers of + type [`ptr]. + + Invariants + ---------- + + - back_pointers[i] + = Some (pointer to (index - (index mod (basis ** i)) - 1)) + (for all i < length back_pointers) + - length back_pointers = log basis index + + Notes + ----- + + - The [index] field is not strictly required but helps in making + the data structure more robust. Indeed, otherwise, we should + also ask the client to provide the index of the cell to be + built, which can be error-prone. + + - The back pointers of a cell are chosen from the back pointers of + its predecessor (except for the genesis cell) and a pointer to this + predecessor. This locality makes the insertion of new cell very + efficient in practice. + + *) + type ('content, 'ptr) cell = { + content : 'content; + back_pointers : 'ptr option FallbackArray.t; + index : Z.t; + } + + let equal equal_ptr equal_content cell1 cell2 = + let equal_back_pointers b1 b2 = + let open FallbackArray in + Compare.Int.(length b1 = length b2) + && fst + @@ fold + (fun (equal, i) h1 -> + (equal && Option.equal equal_ptr h1 (get b2 i), i + 1)) + b1 + (true, 0) + in + let {content; back_pointers; index} = cell1 in + equal_content content cell2.content + && Compare.Z.equal index cell2.index + && equal_back_pointers back_pointers cell2.back_pointers + + let index cell = cell.index + + let back_pointers_to_list a = + FallbackArray.fold + (fun l -> function + | Some ptr -> ptr :: l + | None -> (* By [cell] invariants. *) assert false) + a + [] + |> List.rev + + let pp ~pp_ptr ~pp_content fmt {content; back_pointers; index} = + Format.fprintf + fmt + "content: %a@,index: %s@,@[back_pointers:@ %a@]" + pp_content + content + (Z.to_string index) + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.pp_print_string fmt "; ") + pp_ptr) + (back_pointers_to_list back_pointers) + + let encoding ptr_encoding content_encoding = + let of_list = + FallbackArray.of_list ~fallback:None ~proj:(fun c -> Some c) + in + let to_list = back_pointers_to_list in + let open Data_encoding in + conv + (fun {index; content; back_pointers} -> + (index, content, to_list back_pointers)) + (fun (index, content, back_pointers) -> + {index; content; back_pointers = of_list back_pointers}) + (obj3 + (req "index" n) + (req "content" content_encoding) + (req "back_pointers" (list ptr_encoding))) + + let content cell = cell.content + + let back_pointers cell = back_pointers_to_list cell.back_pointers + + let genesis content = + {index = Z.zero; content; back_pointers = FallbackArray.make 0 None} + + let back_pointer cell i = FallbackArray.get cell.back_pointers i + + (* Precondition: i < length cell.back_pointers *) + let back_pointer_unsafe cell i = + match FallbackArray.get cell.back_pointers i with + | Some ptr -> ptr + | None -> (* By precondition and invariants of cells. *) assert false + + let next ~prev_cell ~prev_cell_ptr content = + let index = Z.succ prev_cell.index in + let back_pointers = + let rec aux power accu i = + if Compare.Z.(index < power) then List.rev accu + else + let back_pointer_i = + if Compare.Z.(Z.rem index power = Z.zero) then prev_cell_ptr + else + (* The following call is valid because of + - [i < List.length prev_cell.back_pointer] + because [log_basis index = log_basis prev_cell.index] + - the invariants of [prev_cell] *) + back_pointer_unsafe prev_cell i + in + let accu = back_pointer_i :: accu in + aux Z.(mul power (of_int basis)) accu (i + 1) + in + aux Z.one [] 0 + in + let back_pointers = + FallbackArray.of_list ~fallback:None ~proj:Option.some back_pointers + in + {index; content; back_pointers} + + (* returns the array of [basis^i] forall [i < len (back_pointers cell)] *) + let list_powers cell = + let rec aux n prev p = + if Compare.Int.(n <= 0) then List.rev p + else aux (n - 1) (basis * prev) (prev :: p) + in + FallbackArray.of_list + ~fallback:0 + ~proj:(fun x -> x) + (aux (FallbackArray.length cell.back_pointers) 1 []) + + (* + [back_pointers] are sorted in decreasing order of their pointing cell index + in the list. So we can do a [binary_search] to find the [cell] with the + smallest index that is greater than [target] in the list. + + More formally, min({c : cell | c.index >= target.index}) where [c] is one of + the pointed cells in the array of back pointers of the [cell] parameter. + *) + let best_skip cell target_index powers = + let open FallbackArray in + let pointed_cell_index i = + Z.(pred @@ sub cell.index (rem cell.index (of_int (get powers i)))) + in + (* cell.index - (cell.index mod get powers i) - 1 in *) + let rec binary_search start_idx end_idx = + if Compare.Int.(start_idx >= end_idx) then Some start_idx + else + let mid_idx = start_idx + ((end_idx - start_idx) / 2) in + let mid_cell_index = pointed_cell_index mid_idx in + if Compare.Z.(mid_cell_index = target_index) then Some mid_idx + else if Compare.Z.(mid_cell_index < target_index) then + binary_search start_idx (mid_idx - 1) + else + let prev_mid_cell_index = pointed_cell_index (mid_idx + 1) in + if Compare.Z.(prev_mid_cell_index = target_index) then + Some (mid_idx + 1) + else if Compare.Z.(prev_mid_cell_index < target_index) then + (* + If (mid_cell_index > target_index) && + (prev_mid_cell_index < target_index) + then we found the closest cell to the target, which is mid_cell, + so we return its index [mid_idx] in the array of back_pointers. + *) + Some mid_idx + else binary_search (mid_idx + 1) end_idx + in + binary_search 0 (length cell.back_pointers - 1) + + type ('ptr, 'content) search_cell_result = + | Found of ('ptr, 'content) cell + | Nearest of { + lower : ('ptr, 'content) cell; + upper : ('ptr, 'content) cell option; + } + | No_exact_or_lower_ptr + | Deref_returned_none + + type ('ptr, 'content) search_result = { + rev_path : ('ptr, 'content) cell list; + last_cell : ('ptr, 'content) search_cell_result; + } + + let pp_rev_path ~pp_cell = + Format.pp_print_list ~pp_sep:Format.pp_print_space pp_cell + + let pp_search_cell_result ~pp_cell fmt = function + | Found cell -> Format.fprintf fmt "Found(%a)" pp_cell cell + | Nearest {lower; upper} -> + Format.fprintf + fmt + "Nearest(lower=%a;upper=%a)" + pp_cell + lower + (Format.pp_print_option pp_cell) + upper + | No_exact_or_lower_ptr -> Format.fprintf fmt "No_exact_or_lower_ptr" + | Deref_returned_none -> Format.fprintf fmt "Deref_returned_none" + + let pp_search_result ~pp_cell fmt {rev_path; last_cell} = + Format.fprintf + fmt + "{rev_path = %a; last_point = %a}" + (pp_rev_path ~pp_cell) + rev_path + (pp_search_cell_result ~pp_cell) + last_cell + + module type MONADIC = sig + type 'a result + + val find : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + ('content, 'ptr) cell option result + + val back_path : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + 'ptr list option result + + val valid_back_path : + equal_ptr:('ptr -> 'ptr -> bool) -> + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_ptr:'ptr -> + 'ptr list -> + bool result + + val search : + deref:('ptr -> ('content, 'ptr) cell option result) -> + compare:('content -> int) -> + cell:('content, 'ptr) cell -> + ('content, 'ptr) search_result result + end + + module Make_monadic (M : MONAD) : MONADIC with type 'a result := 'a M.t = + struct + module Monad_syntax = struct + include M + + let ( let* ) = bind + + module Option = struct + let (return [@ocaml.inline "always"]) = fun x -> M.return (Some x) + + let ( let* ) lo f = + M.bind lo (function None -> M.return None | Some x -> f x) + + let ( let*? ) o f = match o with Some x -> f x | None -> M.return None + end + end + + let rev_back_path ~deref ~cell_ptr ~target_index = + let open Monad_syntax.Option in + let* cell = deref cell_ptr in + let powers = list_powers cell in + let rec aux path ptr = + let path = ptr :: path in + let* cell = deref ptr in + let index = cell.index in + if Compare.Z.(target_index = index) then return path + else if Compare.Z.(target_index > index) then M.return None + else + let*? best_idx = best_skip cell target_index powers in + let*? ptr = back_pointer cell best_idx in + aux path ptr + in + aux [] cell_ptr + + let find ~deref ~cell_ptr ~target_index = + let open Monad_syntax.Option in + let* rev_back_path = rev_back_path ~deref ~cell_ptr ~target_index in + let*? cell_ptr = List.hd rev_back_path in + deref cell_ptr + + let back_path ~deref ~cell_ptr ~target_index = + let open Monad_syntax.Option in + let* rev_back_path = rev_back_path ~deref ~cell_ptr ~target_index in + return (List.rev rev_back_path) + + let mem equal x l = + let open FallbackArray in + let n = length l in + let rec aux idx = + if Compare.Int.(idx >= n) then false + else + match get l idx with + | None -> aux (idx + 1) + | Some y -> if equal x y then true else aux (idx + 1) + in + aux 0 + + let assume_some o f = + let open Monad_syntax in + let* o in + match o with None -> return false | Some x -> f x + + let valid_back_path ~equal_ptr ~deref ~cell_ptr ~target_ptr path = + let open Monad_syntax in + assume_some (deref target_ptr) @@ fun target -> + assume_some (deref cell_ptr) @@ fun cell -> + let target_index = index target + and cell_index = index cell + and powers = list_powers cell in + let rec valid_path index cell_ptr path = + match (cell_ptr, path) with + | final_cell, [] -> + return + (equal_ptr target_ptr final_cell + && Compare.Z.(index = target_index)) + | cell_ptr, cell_ptr' :: path -> + assume_some (deref cell_ptr) @@ fun cell -> + assume_some (deref cell_ptr') @@ fun cell' -> + if mem equal_ptr cell_ptr' cell.back_pointers then + assume_some (return @@ best_skip cell target_index powers) + @@ fun best_idx -> + assume_some (return @@ back_pointer cell best_idx) + @@ fun best_ptr -> + let minimal = equal_ptr best_ptr cell_ptr' in + let index' = cell'.index in + if minimal then valid_path index' cell_ptr' path else return false + else return false + in + match path with + | [] -> return false + | first_cell_ptr :: path -> + if equal_ptr first_cell_ptr cell_ptr then + valid_path cell_index cell_ptr path + else return false + + let search (type ptr) ~(deref : ptr -> ('content, ptr) cell option M.t) + ~compare ~cell = + let open Monad_syntax in + let ( = ), ( < ), ( > ) = Compare.Int.(( = ), ( < ), ( > )) in + (* Given a cell, to compute the minimal path, we need to find the + good back-pointer. This is done linearly with respect to the + number of back-pointers. This number of back-pointers is + logarithmic with respect to the number of non-empty + inboxes. The complexity is consequently in O(log_2^2(n)). Since + in practice, [n < 2^32], we have at most [1000] calls. Besides, + the recursive function is tail recursive. + + The linear search could be turned into a dichotomy search if + necessary. But since this piece of code won't be used in a + carbonated function, we prefer to keep a simple implementation + for the moment. *) + let rec aux rev_path cell ix = + (* Below, we call the [target] the cell for which [compare target = 0]. *) + + (* Invariant: + + - compare cell > target + - ix >= 0 + - if cell <> genesis => ix < List.length (back_pointers cell) + - \exists path' rev_path = cell:path' + *) + let back_pointers_length = FallbackArray.length cell.back_pointers in + if back_pointers_length = 0 then + (* [cell] is the genesis cell. *) + return {rev_path; last_cell = No_exact_or_lower_ptr} + else + let candidate_ptr = + match back_pointer cell ix with + | None -> + (* At this point we have [cell <> genesis]. Consequently, + thanks to the invariant of this function, we have [ix + < List.length (back_pointers cell)]. Consequently, the + call to [back_pointer] cannot fail. *) + assert false + | Some candidate_ptr -> candidate_ptr + in + let* derefed = deref candidate_ptr in + match derefed with + | None -> + (* If we cannot dereference a pointer, We stop the search + and returns the current path. *) + return {rev_path; last_cell = Deref_returned_none} + | Some next_cell -> ( + let comparison = compare next_cell.content in + if comparison = 0 then + (* We have found the target.*) + let rev_path = next_cell :: rev_path in + return {rev_path; last_cell = Found next_cell} + else if comparison > 0 then + if ix < back_pointers_length - 1 then + (* There might be a short path by dereferencing the next pointer. *) + aux rev_path cell (ix + 1) + else + (* The last pointer is still above the target. We are on the good track, *) + let rev_path = next_cell :: rev_path in + aux rev_path next_cell 0 + else if ix = 0 then + (* We found a cell lower than the target. *) + (* The first back pointers gives a cell below the target *) + let rev_path = next_cell :: rev_path in + return + { + rev_path; + last_cell = Nearest {lower = next_cell; upper = Some cell}; + } + else + (* We found a cell lower than the target. *) + (* The previous pointer was actually the good one. *) + let good_candidate_ptr = + match back_pointer cell (ix - 1) with + | None -> assert false + | Some candidate_ptr -> candidate_ptr + in + let* derefed = deref good_candidate_ptr in + match derefed with + | None -> + (* We already dereferenced this pointer before. *) + assert false + | Some good_next_cell -> + let rev_path = good_next_cell :: rev_path in + aux rev_path good_next_cell 0) + in + let comparison = compare cell.content in + if Compare.Int.(comparison = 0) then + (* Particular case where the target is the start cell. *) + return {rev_path = [cell]; last_cell = Found cell} + else if Compare.Int.(comparison < 0) then + return + {rev_path = [cell]; last_cell = Nearest {lower = cell; upper = None}} + else aux [cell] cell 0 + end + + include Make_monadic (struct + type 'a t = 'a + + let (bind [@ocaml.inline "always"]) = ( |> ) + + let[@ocaml.inline always] return x = x + end) + + module Lwt = Make_monadic (Lwt) +end diff --git a/src/lib_base/skip_list.mli b/src/lib_base/skip_list.mli new file mode 100644 index 0000000000000000000000000000000000000000..3288eb2ff4100298a816a374d6bb3ed7acfd0fea --- /dev/null +++ b/src/lib_base/skip_list.mli @@ -0,0 +1,226 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2022 Nomadic Labs *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +(** This module provides an implementation of the skip list data structure. *) + +(** Basic signature for a monad. *) +module type MONAD = sig + type 'a t + + val bind : 'a t -> ('a -> 'b t) -> 'b t + + val return : 'a -> 'a t +end + +(** A skip list represents a sequence of values. There are three main + differences between these [skip list]s and OCaml standard [list]s: + + 1. A skip list cannot be empty. + + 2. A skip list grows at its end. + + 3. Each cell of the skip list provides several back pointers + allowing to *skip* chunk of ancestors of the sequence to directly + jump to a given position. More precisely, given a [basis] + parameter, the i-th back pointers of element number [n] in the + sequence points to [n - n mod basis^i - 1]. The element number [n] + in the sequence contains [log_basis n] back pointers. + + The skip list is defined by a pair of dereferencing function + of type ['ptr -> ('content, 'ptr) cell] and the last cell + of the sequence. The maintainance of this pair is left to the client. + In particular, the client is responsible to correctly bind a cell + to each back pointers reachable from the last cell. + +*) +module type S = sig + (** A cell in the skip list carrying a given ['content] and back + pointers of type ['ptr]. *) + type ('content, 'ptr) cell + + val pp : + pp_ptr:(Format.formatter -> 'ptr -> unit) -> + pp_content:(Format.formatter -> 'content -> unit) -> + Format.formatter -> + ('content, 'ptr) cell -> + unit + + val equal : + ('ptr -> 'ptr -> bool) -> + ('content -> 'content -> bool) -> + ('content, 'ptr) cell -> + ('content, 'ptr) cell -> + bool + + val encoding : + 'ptr Data_encoding.t -> + 'content Data_encoding.t -> + ('content, 'ptr) cell Data_encoding.t + + (** [index cell] returns the position of [cell] in the sequence. *) + val index : (_, _) cell -> Z.t + + (** [content cell] is the content carried by the [cell]. *) + val content : ('content, 'ptr) cell -> 'content + + (** [back_pointer cell i] returns [Some ptr] if [ptr] is the + [i]-th back pointer of [cell]. Returns [None] if the cell + contains less than [i + 1] back pointers. *) + val back_pointer : ('content, 'ptr) cell -> int -> 'ptr option + + (** [back_pointers cell] returns the back pointers of [cell]. *) + val back_pointers : ('content, 'ptr) cell -> 'ptr list + + (** [genesis content] is the first cell of a skip list. It has + no back pointers. *) + val genesis : 'content -> ('content, 'ptr) cell + + (** [next ~prev_cell ~prev_cell_ptr content] creates a new cell + that carries some [content], that follows [prev_cell]. *) + val next : + prev_cell:('content, 'ptr) cell -> + prev_cell_ptr:'ptr -> + 'content -> + ('content, 'ptr) cell + + type ('ptr, 'content) search_cell_result = + | Found of ('ptr, 'content) cell + | Nearest of { + lower : ('ptr, 'content) cell; + upper : ('ptr, 'content) cell option; + } + | No_exact_or_lower_ptr + | Deref_returned_none + + type ('ptr, 'content) search_result = { + rev_path : ('ptr, 'content) cell list; + last_cell : ('ptr, 'content) search_cell_result; + } + + val pp_search_result : + pp_cell:(Format.formatter -> ('ptr, 'content) cell -> unit) -> + Format.formatter -> + ('ptr, 'content) search_result -> + unit + + module type MONADIC = sig + (** Type of results for monadic functions. *) + type 'a result + + (** [find ~deref ~cell_ptr ~target_index] returns [Some cell] where [cell] is + the cell at position [target_index]. This is done by dereferencing the last + pointer of the path returned by {!back_path}. *) + val find : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + ('content, 'ptr) cell option result + + (** [back_path ~deref ~cell_ptr ~target_index] returns [Some path] + where [path] is a sequence of back pointers to traverse to go + from [cell_ptr] to the cell at position [target_index] in the + sequence denoted by [(deref, cell_ptr)]. *) + val back_path : + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_index:Z.t -> + 'ptr list option result + + (** [valid_back_path ~equal_ptr ~deref ~cell_ptr ~target_ptr path] + returns [true] iff [path] is a valid and minimal path from + [cell_ptr] to [target_ptr] in the skip list denoted by + [(deref, cell_ptr)]. *) + val valid_back_path : + equal_ptr:('ptr -> 'ptr -> bool) -> + deref:('ptr -> ('content, 'ptr) cell option result) -> + cell_ptr:'ptr -> + target_ptr:'ptr -> + 'ptr list -> + bool result + + (** [search ~deref ~compare ~cell] allows to find a cell of the skip + list according to its content. This function assumes that the + content of the cells is in increasing order according to the + ordering defined by the function [compare]. In other words, this + function assumes that [compare] is a function that returns a + negative integer for cells before the target and a positive + integer for cells after the target. The value returned by this + function is [{rev_path; last_cell}] such that. + + - [rev_path = []] if and only if [compare (content cell) > 0] + + - For all the cases below, if there is a path from cell [A] to + cell [B], [rev_path] contains the list of cells to go from [B] to + [A]. Consequently, the first element of [rev_path] is [B]. + Except for [Nearest_lower], this path is a minimal path. + + - [last_pointer = Deref_returned_none] if [deref] fails to + associate a cell to a pointer during the search. In that case, + [rev_path] is a path from [cell] to [candidate] where [candidate] + is the last cell for which candidate did not fail and such that + [compare (content (candidate)) > 0]. + + - [last_pointer = No_exact_or_lower_ptr] if for all cell of the + skip list, [compare (content cell) > 0]. In that case, [rev_path] + is a path from [cell] to the genesis cell. + + - [last_pointer = Found target] if there is a cell [target] such + that [compare (content target) = 0] and a path from [cell] to + [target]. In that case, [rev_path] is the minimal path from + [cell] to [target]. + + - [last_pointer = Nearest_lower {lower;upper}] if there is no + cell in the skip list such that [compare (content cell) = 0]. In + that case [lower] is the unique cell such that [compare (content + lower) < 0] and for all other cells [candidate] such that + [compare (content candidate) < 0] then there is a path from + [lower] to [candidate]. [upper], if it exists is the successor + cell to [lower], i.e. [deref ((back_pointer upper) 0) = Some + lower]. In that case, [rev_path] is a path from [cell] to + [lower]. This path is *NOT* minimal but almost. The path is + minimal from [cell] to [upper=Some up]. By minimality, the path + is logarithmic. Consequently, since there is a direct pointer + from [up] to [lower], the passe to [lower] is also + logarithmic. *) + val search : + deref:('ptr -> ('content, 'ptr) cell option result) -> + compare:('content -> int) -> + cell:('content, 'ptr) cell -> + ('content, 'ptr) search_result result + end + + (** Functions in the empty monad are accessible directly. *) + include MONADIC with type 'a result := 'a + + (** This module contains functions in the {!Lwt} monad. *) + module Lwt : MONADIC with type 'a result := 'a Lwt.t + + (** This functor can be used to build monadic functions for the skip list. *) + module Make_monadic (M : MONAD) : MONADIC with type 'a result := 'a M.t +end + +module Make (_ : sig + val basis : int +end) : S diff --git a/src/lib_base/test/dune b/src/lib_base/test/dune index a65372282770563e746b301b1b5a967e5de0ba2a..148eddb98d467e4950879f882f7205e62d477f06 100644 --- a/src/lib_base/test/dune +++ b/src/lib_base/test/dune @@ -24,7 +24,13 @@ -open Tezos_error_monad -open Tezos_test_helpers -open Octez_alcotezt) - (modules test_bounded test_time test_protocol test_p2p_addr test_sized)) + (modules + test_bounded + test_time + test_protocol + test_p2p_addr + test_sized + test_skip_list)) (executable (name main) diff --git a/src/lib_base/test/test_skip_list.ml b/src/lib_base/test/test_skip_list.ml new file mode 100644 index 0000000000000000000000000000000000000000..b3532187e309868ab80a766cc701443f530d02f7 --- /dev/null +++ b/src/lib_base/test/test_skip_list.ml @@ -0,0 +1,519 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2022 Nomadic Labs, *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +(** Testing + ------- + Component: Base + Invocation: dune exec src/lib_base/test/main.exe \ + -- --file test_skip_list.ml + Subject: Test skip list implementation +*) + +open TzPervasives + +exception Skip_list_test_error of string + +let tztest_qcheck2 ?print ?count ~name generator f = + let name, speed, run = + QCheck_alcotest.to_alcotest + ( QCheck2.Test.make ?print ?count ~name generator @@ fun x -> + f x ; + true ) + in + Alcotest.test_case name speed run + +module TestNat (Parameters : sig + val basis : int +end) = +struct + open Parameters + include Skip_list.Make (Parameters) + + (* This represents cells of skip lists whose content are even + numbers from {!val:initial_value} and increase 2 by 2. *) + type t = {size : int; cells : (int * (int, int) cell) list} + + let deref list i = List.assoc ~equal:Int.equal i list.cells + + (* Must be an even number. See {!val:succ}. *) + let initial_value = 10 + + (* Since the list was initialised once/computed once, we can get + back its content from its index directly. *) + let content_from_index ~default list i = + match deref list i with None -> default | Some x -> content x + + let show_cell cell = + Printf.sprintf + "{ content = %d; back_pointers = %s }" + (content cell) + (back_pointers cell |> List.map string_of_int |> String.concat " ") + + let show_cells cells = + String.concat + "; " + (List.map + (fun (i, cell) -> Printf.sprintf "%d:%s" i (show_cell cell)) + cells) + + let show_list list = + Printf.sprintf + "basis: %d, size: %d, cells = %s" + basis + list.size + (show_cells list.cells) + + let show_path path = String.concat " " (List.map string_of_int path) + + let head list = + match List.hd list.cells with None -> assert false | Some h -> h + + let zero = {size = 1; cells = [(0, genesis initial_value)]} + + let succ list = + let prev_cell_ptr, prev_cell = head list in + (* Content of cells are only even numbers so that searching odd numbers will always fail. *) + let cell = + next ~prev_cell ~prev_cell_ptr ((2 * list.size) + initial_value) + in + {size = list.size + 1; cells = (list.size, cell) :: list.cells} + + let back_path list start stop = + back_path ~deref:(deref list) ~cell_ptr:start ~target_index:(Z.of_int stop) + + let find list start stop = + find ~deref:(deref list) ~cell_ptr:start ~target_index:(Z.of_int stop) + + let search list start target_content = + search + ~deref:(deref list) + ~compare:(fun x -> Int.(compare x target_content)) + ~cell:start + + let valid_back_path list start stop path = + valid_back_path + ~equal_ptr:( = ) + ~deref:(deref list) + ~cell_ptr:start + ~target_ptr:stop + path + + let rec nlist basis n = if n = 0 then zero else succ (nlist basis (n - 1)) + + let check_find i j = + let l = nlist basis i in + let () = + match find l i j with + | None -> Test.fail ~__LOC__ "There must be a cell (%d)" i + | Some cell -> + let index = Z.to_int (index cell) in + if index <> j then + Test.fail + ~__LOC__ + "Found cell is not the correct one (found %d, expected %d)" + index + j + in + let path = + match back_path l i j with + | None -> Test.fail ~__LOC__ "There must be path from %d to %d" i j + | Some path -> path + in + let () = + match List.(hd (rev path)) with + | None -> + Test.fail ~__LOC__ " There can't be an empty path from %d to %d" i j + | Some stop_cell -> + if j <> stop_cell then + Test.fail + ~__LOC__ + "Found cell is not equal to stop cell of back path (%d to %d)" + i + j + in + () + + let check_invalid_find i = + let l = nlist basis i in + let check_nothing_found i j = + match find l i j with + | None -> () + | Some _v -> + Test.fail ~__LOC__ "There should be no value found at %d from %d" i j + in + check_nothing_found i (-1) ; + let rec aux j = + if i > j then ( + check_nothing_found j i ; + aux (j + 1)) + in + aux 0 + + let check_path i j back_path_fn = + let l = nlist basis i in + let path = back_path_fn l i j in + match path with + | None -> Test.fail ~__LOC__ "There must be path from %d to %d" i j + | Some path -> + let len = List.length path in + let log_basis x = + int_of_float @@ ceil (log (float_of_int x) /. log (float_of_int basis)) + in + let log_ij = log_basis (i - j + 1) in + let expected = max 1 (log_ij * basis) in + if len > expected then + Test.fail + ~__LOC__ + "The proof is too long! Expected = %d < len = %d [basis = %d, i = \ + %d, log = %d, j = %d]\n" + expected + len + basis + i + log_ij + j ; + if not (valid_back_path l i j path) then + Test.fail + ~__LOC__ + "The path %s does not connect %d to %d (or is invalid/non-minimal)" + (show_path path) + i + j + + let check_invalid_paths i = + let l = nlist basis i in + let rec aux j = + if i <= j then () + else + let () = + match back_path l j i with + | None -> () + | Some _path -> + Test.fail + ~__LOC__ + "There should be no path connecting %d to %d" + j + i + in + aux (j + 1) + in + aux 0 + + let check_lower_path history rev_path target = + match rev_path with + | [] -> + (* checked before. *) + assert false + | [cell_x] -> + if + (* If there is a single element, we check the content of the + cell is smaller than the target. *) + Compare.Int.(content cell_x >= target) + then Test.fail ~__LOC__ "Invalid path: %d" target + | rev_path -> ( + (* The path is returned from the start cell to the target. The + invariant we want to check is in the opposite direction. *) + match rev_path with + | cell_x :: cell_z :: _ -> ( + let i = Z.to_int (index cell_x) in + let next_index = i + 1 in + match + List.nth history.cells (List.length history.cells - next_index - 1) + with + | None -> assert false + | Some (_y, cell_y) -> + if + Compare.Int.( + content cell_x < target + && target < content cell_y + && content cell_y <= content cell_z) + then () + else Test.fail ~__LOC__ "Invariant for 'Lower' is broken") + | _ -> assert false) + + let check_invalid_search_paths i = + let l = nlist basis i in + let rec aux j = + if i <= j then () + else + (* An odd number to make the search fails. *) + let shift_size = 5 in + (* delta is chosen so that j + delta is not in the list and + can be below the smallest element and greater than the + largest element. *) + let delta = + if List.length l.cells mod 2 = 0 then -shift_size else shift_size + in + let t = content_from_index ~default:(-1) l j + delta in + (* By construction, deref never fails since j <= List.length list. *) + match deref l i with + | None -> assert false + | Some start_content -> + (* For each case below, we check whether the last cell + returned is valid with respect to the current path. Two + cases are not possible. *) + (match search l start_content t with + | {last_cell = No_exact_or_lower_ptr; rev_path} -> ( + (* In that case, we check the path returned by search + is above the target. *) + match rev_path with + | [] -> Test.fail ~__LOC__ "unexpected empty path" + | head :: _ -> + if Compare.Int.(content head > t) then () + else + Test.fail + ~__LOC__ + "Invariant for 'No_exact_or_lower_ptr' broken") + | {last_cell = Nearest _; rev_path} -> + (* In that case, we check the property of being a lower path. *) + check_lower_path l rev_path t + | {last_cell = Deref_returned_none; _} -> + (* deref should always work *) + assert false + | {last_cell = Found _; _} -> + (* Because we search for a cell that which is not in + the list, if the cell was found, we fail. *) + Test.fail + ~__LOC__ + "There should be no search path connecting %d to a node with \ + content %d" + i + t) ; + aux (j + 1) + in + aux 0 + + let pp_search_result fmt = + pp_search_result + ~pp_cell:(fun fmt cell -> Format.fprintf fmt "%s" (show_cell cell)) + fmt +end + +let test_skip_list_nat_check_path (basis, i, j) = + let module M = TestNat (struct + let basis = basis + end) in + let back_path list start stop = M.back_path list start stop in + M.check_path i j back_path + +let test_skip_list_nat_check_find (basis, i, j) = + let module M = TestNat (struct + let basis = basis + end) in + M.check_find i j + +let test_skip_list_nat_check_invalid_find (basis, i) = + let module M = TestNat (struct + let basis = basis + end) in + M.check_invalid_find i + +let test_skip_list_nat_check_invalid_path (basis, i) = + let module M = TestNat (struct + let basis = basis + end) in + M.check_invalid_paths i + +let test_minimal_back_path () = + let basis = 4 in + let module M = TestNat (struct + let basis = basis + end) in + let l = M.nlist basis 20 in + let check_minimal_path = function + | None, _ -> Test.fail ~__LOC__ "empty path" + | Some path, expected_path -> + if path = expected_path then () + else + Test.fail + ~__LOC__ + "non-minimal path:[%s] != expected_path:[%s]" + (M.show_path path) + (M.show_path expected_path) + in + let cases = + [ + (6, 1, [6; 3; 2; 1]); + (6, 3, [6; 3]); + (10, 3, [10; 7; 3]); + (10, 5, [10; 7; 6; 5]); + (10, 7, [10; 7]); + (10, 9, [10; 9]); + ] + in + List.iter + check_minimal_path + (List.map + (fun (start, target, expected_path) -> + (M.back_path l start target, expected_path)) + cases) + +let test_search_non_minimal_back_path () = + let basis = 4 in + let module M = TestNat (struct + let basis = basis + end) in + let l = M.nlist basis 100 in + let index_of_content candidate = + match List.find (fun (_, cell) -> cell = candidate) l.cells with + | None -> assert false + | Some (x, _) -> x + in + let deref x = match M.deref l x with None -> assert false | Some x -> x in + (* This target is chosen to demonstrate that the path is not always + minimal, but this happens only on the very last node. [target] + must be odd to ensure the content is not in the list. *) + let target = 17 in + let start_index = 100 in + let start = deref start_index in + (* Since we are only checking the minimality of the path returned by + search, we assume the other part of the [search] specification to + be correct below (hence the [assert false]). *) + match M.search l start target with + | M.{last_cell = Nearest {lower; upper = Some upper}; rev_path} -> ( + match rev_path with + | [] -> + (* By specification of the function [search]. *) + assert false + | _lower :: upper_path as lower_path -> ( + (* We check the upper path is minimal. *) + let upper_index = index_of_content upper in + match M.back_path l start_index upper_index with + | None -> + (* By specification of the function [search]. *) + assert false + | Some upper_expected_path -> + if List.rev upper_path = List.map deref upper_expected_path then + (* We check the lower path is not minimal. *) + let lower_index = index_of_content lower in + match M.back_path l start_index lower_index with + | None -> + (* By specification of the function [search]. *) + assert false + | Some lower_expected_path -> + if List.rev lower_path = List.map deref lower_expected_path + then + Test.fail + ~__LOC__ + "The path returned is minimal while it should not be \ + the case." + else () + else (* By specification of the function [search]. *) + assert false)) + | _ -> + (* The cell does not exist in the list. *) + assert false + +let test_skip_list_nat_check_path_with_search (basis, i, j) = + let module M = TestNat (struct + let basis = basis + end) in + M.check_path i j (fun l i j -> + let target = M.content_from_index ~default:(-1) l j in + let start = + match M.deref l i with None -> assert false | Some start -> start + in + match M.search l start target with + | {last_cell = Found _; rev_path} -> + List.rev_map + (fun cell -> + let x = M.content cell in + (x - 10) / 2) + rev_path + |> Option.some + | _result -> None) + +let test_skip_list_nat_check_invalid_path_with_search (basis, i) = + let module M = TestNat (struct + let basis = basis + end) in + M.check_invalid_search_paths i + +let tests = + [ + tztest_qcheck2 + ~name:"Skip list: produce paths with `back_path` and check" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 100 in + let* j = 0 -- i in + return (basis, i, j)) + test_skip_list_nat_check_path; + tztest_qcheck2 + ~name:"Skip list: find cell with `find` and `check`" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 100 in + let* j = 0 -- i in + return (basis, i, j)) + test_skip_list_nat_check_find; + tztest_qcheck2 + ~name:"Skip list: `find` won't produce invalid value" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 100 in + return (basis, i)) + test_skip_list_nat_check_invalid_find; + tztest_qcheck2 + ~name:"Skip list: `back_path` won't produce invalid paths" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 100 in + return (basis, i)) + test_skip_list_nat_check_invalid_path; + Alcotest.test_case + "Skip list: check if the back_path is minimal" + `Quick + test_minimal_back_path; + tztest_qcheck2 + ~name:"Skip list: produce paths with `search` and check" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 100 in + let* j = 0 -- i in + return (basis, i, j)) + test_skip_list_nat_check_path_with_search; + tztest_qcheck2 + ~name:"Skip list: `search` won't produce invalid paths" + ~count:10 + QCheck2.Gen.( + let* basis = frequency [(5, pure 4); (1, 2 -- 73)] in + let* i = 0 -- 10 in + return (basis, i)) + test_skip_list_nat_check_invalid_path_with_search; + (* We cheat here to avoid mixing non-pbt tests with pbt tests. *) + tztest_qcheck2 + ~name:"Skip list: `search` may not produce minimal path" + ~count:10 + QCheck2.Gen.unit + test_search_non_minimal_back_path; + ] + +let () = Alcotest.run ~__FILE__ "Skip_list" [("skip list", tests)] diff --git a/src/lib_base/tzPervasives.ml b/src/lib_base/tzPervasives.ml index 0f8194b6bcac9e07d919263514584b2caeedc260..1fc558fd3e3d5ba6cae0c6747f4c431615e20a14 100644 --- a/src/lib_base/tzPervasives.ml +++ b/src/lib_base/tzPervasives.ml @@ -117,3 +117,5 @@ module Empty = struct let absurd : t -> 'a = function _ -> . end + +module Skip_list = Skip_list diff --git a/src/lib_base/tzPervasives.mli b/src/lib_base/tzPervasives.mli index 16e8f507989d2d426e65046e54424b9ce8ba7ea4..cc91a4825cf08401f67f9591b5979fd789e24f58 100644 --- a/src/lib_base/tzPervasives.mli +++ b/src/lib_base/tzPervasives.mli @@ -106,6 +106,7 @@ module Operation_metadata_list_list_hash = Tezos_crypto.Hashed.Operation_metadata_list_list_hash module Protocol_hash = Tezos_crypto.Hashed.Protocol_hash module Signature = Tezos_crypto.Signature +module Skip_list = Skip_list include module type of Utils.Infix