diff --git a/src/lib_context/context.ml b/src/lib_context/context.ml index ce40ee4725eb32f699d87c540a99b6d1e3203212..3819fb75a9b9c56e86cbf2cfcd7ee1e156d7c4f0 100644 --- a/src/lib_context/context.ml +++ b/src/lib_context/context.ml @@ -196,6 +196,8 @@ and context = { type t = context +let index {index; _} = index + module type S = Tezos_context_sigs.Context.S (*-- Version Access and Update -----------------------------------------------*) @@ -299,7 +301,7 @@ type value = bytes type tree = Store.tree module Tree = Tezos_context_helpers.Context.Make_tree (Store) -module Proof = Tree.Proof +include Tezos_context_helpers.Context.Make_proof (Store) let mem ctxt key = Tree.mem ctxt.tree (data_key key) @@ -456,6 +458,10 @@ let merkle_tree t leaf_kind key = in key_to_merkle_tree subtree key +let produce_tree_proof index = produce_tree_proof index.repo + +let produce_stream_proof index = produce_stream_proof index.repo + (*-- Predefined Fields -------------------------------------------------------*) let get_protocol v = diff --git a/src/lib_context/context.mli b/src/lib_context/context.mli index 4e8bc89a8c20f6b94773b6adf0fd4dee32855012..b5173e79b17d5ee4d60aa0ef439d213f234d63e6 100644 --- a/src/lib_context/context.mli +++ b/src/lib_context/context.mli @@ -41,12 +41,14 @@ module type S = sig include Tezos_context_sigs.Context.S end -include S +(** A block-indexed (key x value) store directory. *) +type index + +include S with type index := index type context = t -(** A block-indexed (key x value) store directory. *) -type index +val index : context -> index (** Open or initialize a versioned store at a given path. *) val init : diff --git a/src/lib_context/helpers/context.ml b/src/lib_context/helpers/context.ml index ccbca851ceba256a55e2ee4e51233488e3d3c963..31278b4a0aef66923b02a9a673d5d8886f249c5f 100644 --- a/src/lib_context/helpers/context.ml +++ b/src/lib_context/helpers/context.ml @@ -36,141 +36,19 @@ module type DB = and type metadata = Metadata.t and type Key.step = Path.step +module Kinded_hash = struct + let of_context_hash = function + | `Value h -> `Contents (Hash.of_context_hash h, ()) + | `Node h -> `Node (Hash.of_context_hash h) + + let to_context_hash = function + | `Contents (h, ()) -> `Value (Hash.to_context_hash h) + | `Node h -> `Node (Hash.to_context_hash h) +end + module Make_tree (Store : DB) = struct include Store.Tree - module Kinded_hash = struct - let of_context_hash = function - | `Contents h -> `Contents (Hash.of_context_hash h, ()) - | `Node h -> `Node (Hash.of_context_hash h) - - let to_context_hash = function - | `Contents (h, ()) -> `Contents (Hash.to_context_hash h) - | `Node h -> `Node (Hash.to_context_hash h) - end - - module DB_proof = Store.Tree.Proof - - module Proof = struct - include Tezos_context_sigs.Context.Proof_types - - let v ~before ~after state = {before; after; state} - - let before t = t.before - - let after t = t.after - - let state t = t.state - - module State = struct - let rec to_inode : type a b. (a -> b) -> a DB_proof.inode -> b inode = - fun f {length; proofs} -> - {length; proofs = List.map (fun (k, v) -> (k, f v)) proofs} - - and to_tree : DB_proof.tree -> tree = function - | Contents (c, ()) -> Contents c - | Blinded_contents (h, ()) -> Blinded_contents (Hash.to_context_hash h) - | Node l -> Node (List.map (fun (k, v) -> (k, to_tree v)) l) - | Blinded_node h -> Blinded_node (Hash.to_context_hash h) - | Inode i -> Inode (to_inode to_inode_tree i) - | Extender e -> Extender (to_inode_extender to_inode_tree e) - - and to_inode_extender : - type a b. (a -> b) -> a DB_proof.inode_extender -> b inode_extender = - fun f {length; segments; proof} -> {length; segments; proof = f proof} - - and to_inode_tree : DB_proof.inode_tree -> tree inode_tree = function - | Blinded_inode h -> Blinded_inode (Hash.to_context_hash h) - | Inode_values l -> - Inode_values (List.map (fun (k, v) -> (k, to_tree v)) l) - | Inode_tree i -> Inode_tree (to_inode to_inode_tree i) - | Inode_extender e -> Inode_extender (to_inode_extender to_inode_tree e) - - let rec of_inode : type a b. (a -> b) -> a inode -> b DB_proof.inode = - fun f {length; proofs} -> - {length; proofs = List.map (fun (k, v) -> (k, f v)) proofs} - - and of_tree : tree -> DB_proof.tree = function - | Contents c -> Contents (c, ()) - | Blinded_contents h -> Blinded_contents (Hash.of_context_hash h, ()) - | Node l -> Node (List.map (fun (k, v) -> (k, of_tree v)) l) - | Blinded_node h -> Blinded_node (Hash.of_context_hash h) - | Inode i -> Inode (of_inode of_inode_tree i) - | Extender e -> Extender (of_inode_extender of_inode_tree e) - - and of_inode_extender : - type a b. (a -> b) -> a inode_extender -> b DB_proof.inode_extender = - fun f {length; segments; proof} -> {length; segments; proof = f proof} - - and of_inode_tree : tree inode_tree -> DB_proof.inode_tree = function - | Blinded_inode h -> Blinded_inode (Hash.of_context_hash h) - | Inode_values l -> - Inode_values (List.map (fun (k, v) -> (k, of_tree v)) l) - | Inode_tree i -> Inode_tree (of_inode of_inode_tree i) - | Inode_extender e -> Inode_extender (of_inode_extender of_inode_tree e) - - let of_stream_elt : elt -> DB_proof.elt = function - | Contents c -> Contents c - | Node l -> - Node (List.map (fun (k, v) -> (k, Kinded_hash.of_context_hash v)) l) - | Inode i -> Inode (of_inode Hash.of_context_hash i) - | Inode_extender e -> - Inode_extender (of_inode_extender Hash.of_context_hash e) - - let of_stream : stream -> DB_proof.stream = Seq.map of_stream_elt - - let to_stream_elt : DB_proof.elt -> elt = function - | Contents c -> Contents c - | Node l -> - Node (List.map (fun (k, v) -> (k, Kinded_hash.to_context_hash v)) l) - | Inode i -> Inode (to_inode Hash.to_context_hash i) - | Inode_extender e -> - Inode_extender (to_inode_extender Hash.to_context_hash e) - - let to_stream : DB_proof.stream -> stream = Seq.map to_stream_elt - end - - let of_proof f p = - let before = Kinded_hash.to_context_hash (Proof.before p) in - let after = Kinded_hash.to_context_hash (Proof.after p) in - let state = f (Proof.state p) in - v ~before ~after state - - let to_proof f p = - let before = Kinded_hash.of_context_hash p.before in - let after = Kinded_hash.of_context_hash p.after in - let state = f p.state in - DB_proof.v ~before ~after state - - let to_tree = of_proof State.to_tree - - let of_tree = to_proof State.of_tree - - let to_stream = of_proof State.to_stream - - let of_stream = to_proof State.of_stream - end - - let produce_proof repo hash f = - let open Lwt_syntax in - let hash = Kinded_hash.of_context_hash hash in - let+ (p, r) = produce_proof repo hash f in - (Proof.to_tree p, r) - - let verify_proof proof f = - let proof = Proof.of_tree proof in - verify_proof proof f - - let produce_stream repo hash f = - let open Lwt_syntax in - let hash = Kinded_hash.of_context_hash hash in - let+ (p, r) = produce_stream repo hash f in - (Proof.to_stream p, r) - - let verify_stream proof f = - let proof = Proof.of_stream proof in - verify_stream proof f - let pp = Irmin.Type.pp Store.tree_t let empty _ = Store.Tree.empty () @@ -301,7 +179,7 @@ module Make_tree (Store : DB) = struct repo (match kinded_hash with | `Node hash -> `Node (Hash.of_context_hash hash) - | `Contents hash -> `Contents (Hash.of_context_hash hash, ())) + | `Value hash -> `Contents (Hash.of_context_hash hash, ())) let list tree ?offset ?length key = Store.Tree.list ~cache:true tree ?offset ?length key @@ -343,6 +221,124 @@ module Make_tree (Store : DB) = struct | exn -> raise exn) end +module Make_proof (Store : DB) = struct + module DB_proof = Store.Tree.Proof + + module Proof = struct + include Tezos_context_sigs.Context.Proof_types + + module State = struct + let rec to_inode : type a b. (a -> b) -> a DB_proof.inode -> b inode = + fun f {length; proofs} -> + {length; proofs = List.map (fun (k, v) -> (k, f v)) proofs} + + and to_tree : DB_proof.tree -> tree = function + | Contents (c, ()) -> Value c + | Blinded_contents (h, ()) -> Blinded_value (Hash.to_context_hash h) + | Node l -> Node (List.map (fun (k, v) -> (k, to_tree v)) l) + | Blinded_node h -> Blinded_node (Hash.to_context_hash h) + | Inode i -> Inode (to_inode to_inode_tree i) + | Extender e -> Extender (to_inode_extender to_inode_tree e) + + and to_inode_extender : + type a b. (a -> b) -> a DB_proof.inode_extender -> b inode_extender = + fun f {length; segments = segment; proof} -> + {length; segment; proof = f proof} + + and to_inode_tree : DB_proof.inode_tree -> inode_tree = function + | Blinded_inode h -> Blinded_inode (Hash.to_context_hash h) + | Inode_values l -> + Inode_values (List.map (fun (k, v) -> (k, to_tree v)) l) + | Inode_tree i -> Inode_tree (to_inode to_inode_tree i) + | Inode_extender e -> Inode_extender (to_inode_extender to_inode_tree e) + + let rec of_inode : type a b. (a -> b) -> a inode -> b DB_proof.inode = + fun f {length; proofs} -> + {length; proofs = List.map (fun (k, v) -> (k, f v)) proofs} + + and of_tree : tree -> DB_proof.tree = function + | Value c -> Contents (c, ()) + | Blinded_value h -> Blinded_contents (Hash.of_context_hash h, ()) + | Node l -> Node (List.map (fun (k, v) -> (k, of_tree v)) l) + | Blinded_node h -> Blinded_node (Hash.of_context_hash h) + | Inode i -> Inode (of_inode of_inode_tree i) + | Extender e -> Extender (of_inode_extender of_inode_tree e) + + and of_inode_extender : + type a b. (a -> b) -> a inode_extender -> b DB_proof.inode_extender = + fun f {length; segment = segments; proof} -> + {length; segments; proof = f proof} + + and of_inode_tree : inode_tree -> DB_proof.inode_tree = function + | Blinded_inode h -> Blinded_inode (Hash.of_context_hash h) + | Inode_values l -> + Inode_values (List.map (fun (k, v) -> (k, of_tree v)) l) + | Inode_tree i -> Inode_tree (of_inode of_inode_tree i) + | Inode_extender e -> Inode_extender (of_inode_extender of_inode_tree e) + + let of_stream_elt : Stream.elt -> DB_proof.elt = function + | Value c -> Contents c + | Node l -> + Node (List.map (fun (k, v) -> (k, Kinded_hash.of_context_hash v)) l) + | Inode i -> Inode (of_inode Hash.of_context_hash i) + | Inode_extender e -> + Inode_extender (of_inode_extender Hash.of_context_hash e) + + let of_stream : stream -> DB_proof.stream = Seq.map of_stream_elt + + let to_stream_elt : DB_proof.elt -> Stream.elt = function + | Contents c -> Value c + | Node l -> + Node (List.map (fun (k, v) -> (k, Kinded_hash.to_context_hash v)) l) + | Inode i -> Inode (to_inode Hash.to_context_hash i) + | Inode_extender e -> + Inode_extender (to_inode_extender Hash.to_context_hash e) + + let to_stream : DB_proof.stream -> stream = Seq.map to_stream_elt + end + + let of_proof f p = + let before = Kinded_hash.to_context_hash (DB_proof.before p) in + let after = Kinded_hash.to_context_hash (DB_proof.after p) in + let state = f (DB_proof.state p) in + {version = 0; before; after; state} + + let to_proof f p = + let before = Kinded_hash.of_context_hash p.before in + let after = Kinded_hash.of_context_hash p.after in + let state = f p.state in + DB_proof.v ~before ~after state + + let to_tree = of_proof State.to_tree + + let of_tree = to_proof State.of_tree + + let to_stream = of_proof State.to_stream + + let of_stream = to_proof State.of_stream + end + + let produce_tree_proof repo hash f = + let open Lwt_syntax in + let hash = Kinded_hash.of_context_hash hash in + let+ (p, r) = Store.Tree.produce_proof repo hash f in + (Proof.to_tree p, r) + + let verify_tree_proof proof f = + let proof = Proof.of_tree proof in + Store.Tree.verify_proof proof f + + let produce_stream_proof repo hash f = + let open Lwt_syntax in + let hash = Kinded_hash.of_context_hash hash in + let+ (p, r) = Store.Tree.produce_stream repo hash f in + (Proof.to_stream p, r) + + let verify_stream_proof proof f = + let proof = Proof.of_stream proof in + Store.Tree.verify_stream proof f +end + type error += Unsupported_context_hash_version of Context_hash.Version.t let () = diff --git a/src/lib_context/helpers/context.mli b/src/lib_context/helpers/context.mli index 381af4145bd4d9164e817d6e0474b69a5f51e7bd..ab715b766bddef8dcb81c3433c3cb82c45d082ea 100644 --- a/src/lib_context/helpers/context.mli +++ b/src/lib_context/helpers/context.mli @@ -44,8 +44,6 @@ module Make_tree (DB : DB) : sig and type value := DB.contents and type tree := DB.tree - module Proof : Tezos_context_sigs.Context.PROOF - val pp : Format.formatter -> DB.tree -> unit val empty : _ -> DB.tree @@ -60,7 +58,7 @@ module Make_tree (DB : DB) : sig val of_raw : raw -> DB.tree - type kinded_hash := [`Contents of Context_hash.t | `Node of Context_hash.t] + type kinded_hash := [`Value of Context_hash.t | `Node of Context_hash.t] type repo = DB.repo @@ -68,12 +66,22 @@ module Make_tree (DB : DB) : sig val shallow : DB.repo -> kinded_hash -> DB.tree + (** Exception raised by [find_tree] and [add_tree] when applied to shallow + trees. It is exposed for so that the memory context can in turn raise it. *) + exception Context_dangling_hash of string +end + +module Make_proof (DB : DB) : sig + module Proof : Tezos_context_sigs.Context.PROOF + + type kinded_hash := [`Value of Context_hash.t | `Node of Context_hash.t] + type tree_proof := Proof.tree Proof.t type stream_proof := Proof.stream Proof.t type ('proof, 'result) producer := - repo -> + DB.repo -> kinded_hash -> (DB.tree -> (DB.tree * 'result) Lwt.t) -> ('proof * 'result) Lwt.t @@ -83,17 +91,13 @@ module Make_tree (DB : DB) : sig (DB.tree -> (DB.tree * 'result) Lwt.t) -> (DB.tree * 'result, [`Msg of string]) result Lwt.t - val produce_proof : (tree_proof, 'a) producer - - val verify_proof : (tree_proof, 'a) verifier + val produce_tree_proof : (tree_proof, 'a) producer - val produce_stream : (stream_proof, 'a) producer + val verify_tree_proof : (tree_proof, 'a) verifier - val verify_stream : (stream_proof, 'a) verifier + val produce_stream_proof : (stream_proof, 'a) producer - (** Exception raised by [find_tree] and [add_tree] when applied to shallow - trees. It is exposed for so that the memory context can in turn raise it. *) - exception Context_dangling_hash of string + val verify_stream_proof : (stream_proof, 'a) verifier end type error += Unsupported_context_hash_version of Context_hash.Version.t diff --git a/src/lib_context/memory/context.ml b/src/lib_context/memory/context.ml index f2610d7806b685992e5a3d07c4c3c811a5e14af5..7fe42cfb4387e31bbc77e164bfc6a466bfe2887b 100644 --- a/src/lib_context/memory/context.ml +++ b/src/lib_context/memory/context.ml @@ -29,8 +29,11 @@ module Store = Irmin_pack_mem.Make (Node) (Commit) (Conf) (Metadata) (Contents) (Path) (Branch) (Hash) +module Tree = Tezos_context_helpers.Context.Make_tree (Store) +include Tree +include Tezos_context_helpers.Context.Make_proof (Store) -type index = Store.Repo.t +type index = Store.repo type context = {repo : index; parents : Store.Commit.t list; tree : Store.tree} @@ -42,9 +45,6 @@ type key = string list type value = bytes -module Tree = Tezos_context_helpers.Context.Make_tree (Store) -include Tree - let index {repo; _} = repo let exists index key = diff --git a/src/lib_context/memory/context.mli b/src/lib_context/memory/context.mli index 0f3d156459abacedd020a02a17f3983e0e28329b..6ac09d3febee12aa73e448748df53a1c218d796f 100644 --- a/src/lib_context/memory/context.mli +++ b/src/lib_context/memory/context.mli @@ -26,10 +26,10 @@ (** Implementation of Tezos context fully in memory. *) -include Tezos_context_sigs.Context.S - type index +include Tezos_context_sigs.Context.S with type index := index + val index : t -> index val exists : index -> Context_hash.t -> bool Lwt.t diff --git a/src/lib_context/sigs/context.ml b/src/lib_context/sigs/context.ml index ce2a8631eb8e2530fb18e5173335dad94ef0649f..21af243e7fa2860d77ce683f283732ed2537e88e 100644 --- a/src/lib_context/sigs/context.ml +++ b/src/lib_context/sigs/context.ml @@ -23,8 +23,7 @@ (* *) (*****************************************************************************) -(** The tree depth of a fold. See the [View.fold] function for more - information. *) +(** The tree depth of a fold. See the [fold] function for more information. *) type depth = [`Eq of int | `Le of int | `Lt of int | `Ge of int | `Gt of int] module type VIEW = sig @@ -63,7 +62,7 @@ module type VIEW = sig val list : t -> ?offset:int -> ?length:int -> key -> (string * tree) list Lwt.t - (** [length t key] is an Lwt promise that resolve to the number of + (** [length t key] is an Lwt promise that resolves to the number of files and sub-nodes stored under [k] in [t]. It is equivalent to [let+ l = list t k in List.length l] but has a @@ -101,11 +100,11 @@ module type VIEW = sig The depth is 0-indexed. If [depth] is set (by default it is not), then [f] is only called when the conditions described by the parameter is true: - - [Eq d] folds over nodes and contents of depth exactly [d]. - - [Lt d] folds over nodes and contents of depth strictly less than [d]. - - [Le d] folds over nodes and contents of depth less than or equal to [d]. - - [Gt d] folds over nodes and contents of depth strictly more than [d]. - - [Ge d] folds over nodes and contents of depth more than or equal to [d]. + - [Eq d] folds over nodes and values of depth exactly [d]. + - [Lt d] folds over nodes and values of depth strictly less than [d]. + - [Le d] folds over nodes and values of depth less than or equal to [d]. + - [Gt d] folds over nodes and values of depth strictly more than [d]. + - [Ge d] folds over nodes and values of depth more than or equal to [d]. If [order] is [`Sorted] (the default), the elements are traversed in lexicographic order of their keys. For large nodes, it is memory-consuming, @@ -187,139 +186,185 @@ module type HASH_VERSION = sig end module Proof_types = struct - (** The type for node segments. *) - type segment = string + (** Proofs are compact representations of trees which can be shared + between peers. - (** The type for contents. *) - type contents = bytes + This is expected to be used as follows: - (** The type for hashes. *) - type hash = Context_hash.t + - A first peer runs a function [f] over a tree [t]. While performing + this computation, it records: the hash of [t] (called [before] + below), the hash of [f t] (called [after] below) and a subset of [t] + which is needed to replay [f] without any access to the first peer's + storage. Once done, all these informations are packed into a proof of + type [t] that is sent to the second peer. - (** The type for (internal) inode proofs. + - The second peer generates an initial tree [t'] from [p] and computes + [f t']. Once done, it compares [t']'s hash and [f t']'s hash to [before] + and [after]. If they match, they know that the result state [f t'] is a + valid context state, without having to have access to the full storage + of the first peer. *) - These proofs encode large directories into a more efficient tree-like - structure. + (** The type for file and directory names. *) + type step = string - Invariant are dependent on the backend. + (** The type for values. *) + type value = bytes - [length] is the total number of entries in the chidren of the inode. - E.g. the size of the "flattened" version of that inode. This is used - to efficiently implements paginated lists. + (** The type of indices for inodes' children. *) + type index = int - [proofs] have a length of at most [Conf.entries] entries. This list can - be sparse so every proof is indexed by their position between - [0 ... (Conf.entries-1)]. For binary trees, this boolean - index is a segment of the left-right decision proof corresponding - to the path in that binary tree. *) - type 'a inode = {length : int; proofs : (int * 'a) list} - - (** The type for inode extenders. *) - type 'a inode_extender = {length : int; segments : int list; proof : 'a} - [@@deriving irmin] + (** The type for hashes. *) + type hash = Context_hash.t - (** The type for inode trees. + (** The type for (internal) inode proofs. - Inodes are optimized representations of trees. Pointers in that trees - would refer to blinded nodes, nodes or to other inodes. E.g. - Blinded content nor contents are not expected to appear directly in - an inode tree. *) - type 'tree inode_tree = - | Blinded_inode of hash - | Inode_values of (segment * 'tree) list - | Inode_tree of 'tree inode_tree inode - | Inode_extender of 'tree inode_tree inode_extender + These proofs encode large directories into a tree-like structure. This + reflects irmin-pack's way of representing nodes and computing + hashes (tree-like representations for nodes scales better than flat + representations). + + [length] is the total number of entries in the children of the inode. + It's the size of the "flattened" version of that inode. [length] can be + used to prove the correctness of operations such [Tree.length] and + [Tree.list ~offset ~length] in an efficient way. + + [proofs] contains the children proofs. It is a sparse list of ['a] values. + These values are associated to their index in the list, and the list is + kept sorted in increasing order of indices. ['a] can be a concrete proof + or a hash of that proof. + - In proofs with version 0, inodes have at most 32 proofs + (indexed from 0 to 31). *) + type 'a inode = {length : int; proofs : (index * 'a) list} + + (** The type for inode extenders. + + An extender is a compact representation of a sequence of [inode] which + contain only one child. As for inodes, The ['a] parameter can be a + concrete proof or a hash of that proof. + + If an inode proof contains singleton children [i_0, ..., i_n] such as: + [{length=l; proofs = [ (i_0, {proofs = ... { proofs = [ (i_n, p) ] }})]}], + then it is compressed into the inode extender + [{length=l; segment = [i_0;..;i_n]; proof=p}] sharing the same lenght [l] + and final proof [p]. *) + type 'a inode_extender = {length : int; segment : index list; proof : 'a} [@@deriving irmin] (** The type for compressed and partial Merkle tree proofs. - [Blinded_contents h] is a shallow pointer to contents having hash [h]. - [Contents c] is the contents [c]. + Tree proofs do not provide any guarantee with the ordering of + computations. For instance, if two effects commute, they won't be + distinguishable by this kind of proofs. + + [Value v] proves that a value [v] exists in the store. - Tree proofs do not provide any guarantee with the ordering of - computations. For instance, if two effects commute, they won't be - distinguishable by this kind of proofs. + [Blinded_value h] proves a value with hash [h] exists in the store. - [Blinded_node h] is a shallow pointer to a node having hash [h]. + [Node ls] proves that a a "flat" node containing the list of files [ls] + exists in the store. + - In proofs with version 0, the length of [ls] is at most 256; - [Node ls] is a "flat" node containing the list of files [ls]. The length - of [ls] is at most [Conf.stable_hash]. + [Blinded_node h] proves that a node with hash [h] exists in the store. - [Inode i] is an optimized representation of a node as a tree. + [Inode i] proves that an inode [i] exists in the store. - *) + [Extender e] proves that an inode extender [e] exist in the store. *) type tree = - | Contents of contents - | Blinded_contents of hash - | Node of (segment * tree) list + | Value of value + | Blinded_value of hash + | Node of (step * tree) list | Blinded_node of hash - | Inode of tree inode_tree inode - | Extender of tree inode_tree inode_extender - [@@deriving irmin] + | Inode of inode_tree inode + | Extender of inode_tree inode_extender + + (** The type for inode trees. It is a subset of [tree], limited to nodes. + + [Blinded_inode h] proves that an inode with hash [h] exists in the store. + + [Inode_values ls] is simliar to trees' [Node]. + + [Inode_tree i] is similar to tree's [Inode]. + + [Inode_extender e] is similar to trees' [Extender]. *) + and inode_tree = + | Blinded_inode of hash + | Inode_values of (step * tree) list + | Inode_tree of inode_tree inode + | Inode_extender of inode_tree inode_extender (** The type for kinded hashes. *) - type kinded_hash = [`Contents of Context_hash.t | `Node of Context_hash.t] - - (** The type for elements of stream proofs. *) - type elt = - | Contents of contents - | Node of (segment * kinded_hash) list - | Inode of hash inode - | Inode_extender of hash inode_extender - [@@deriving irmin] + type kinded_hash = [`Value of hash | `Node of hash] + + module Stream = struct + (** Stream proofs represent an explicit traversal of a Merle tree proof. + Every element (a node, a value, or a shallow pointer) met is first + "compressed" by shallowing its children and then recorded in the proof. + + As stream proofs directly encode the recursive construction of the + Merkle root hash is slightly simpler to implement: verifier simply + need to hash the compressed elements lazily, without any memory or + choice. - (** The type for stream proofs. Stream poofs provides stronger ordering - guarantees as the read effects have to happen in the exact same order and - they are easier to verify. *) - type stream = elt Seq.t [@@deriving irmin] + Moreover, the minimality of stream proofs is trivial to check. + Once the computation has consumed the compressed elements required, + it is sufficient to check that no more compressed elements remain + in the proof. - type 'a t = {before : kinded_hash; after : kinded_hash; state : 'a} + However, as the compressed elements contain all the hashes of their + shallow children, the size of stream proofs is larger + (at least double in size in practice) than tree proofs, which only + contains the hash for intermediate shallow pointers. *) + + (** The type for elements of stream proofs. + + [Value v] is a proof that the next element read in the store is the + value [v]. + + [Node n] is a proof that the next element read in the store is the + node [n]. + + [Inode i] is a proof that the next element read in the store is the + inode [i]. + + [Inode_extender e] is a proof that the next element read in the store + is the node extender [e]. *) + type elt = + | Value of value + | Node of (step * kinded_hash) list + | Inode of hash inode + | Inode_extender of hash inode_extender + + (** The type for stream proofs. + + The sequance [e_1 ... e_n] proves that the [e_1], ..., [e_n] are + read in the store in sequence. *) + type t = elt Seq.t + end + + type stream = Stream.t + + (** The type for proofs of kind ['a]. + + A proof [p] proves that the state advanced from [before p] to + [after p]. [state p]'s hash is [before p], and [state p] contains + the minimal information for the computation to reach [after p]. + + [version p] is the proof version, currently only version 0 is supported. + - Proofs with version 0 have top-level nodes of size 256. Whenever a node + has more than 256 entries, it is converted into an inode tree with + an branching factor of 32. *) + type 'a t = { + version : int; + before : kinded_hash; + after : kinded_hash; + state : 'a; + } end module type PROOF = sig - (** Proofs are compact representations of trees which can be shared - between a node and a client. - - The protocol is the following: - - - The node runs a function [f] over a tree [t]. While performing - this computation, the node records: the hash of [t] (called [before] - below), the hash of [f t] (called [after] below) and a subset of [t] - which is needed to replay [f] without any access to the node's storage. - Once done, the node packs this into a proof [p] and sends this to the - client. - - - The client generates an initial tree [t'] from [p] and computes [f t']. - Once done, it compares [t']'s hash and [f t']'s hash to [before] and - [after]. If they match, they know that the result state [f t'] is a - valid context state, without having to have access to the full node's - storage. *) - - include - module type of Proof_types - with type 'a inode = 'a Proof_types.inode - and type 'a inode_extender = 'a Proof_types.inode_extender - and type 'a inode_tree = 'a Proof_types.inode_tree - and type tree = Proof_types.tree - and type elt = Proof_types.elt - and type stream = Proof_types.stream - and type 'a t = 'a Proof_types.t - - (** [t] proves that the state advanced from [before t] to [after t]. - [state t]'s hash is [before], and [state t] contains the minimal - information for the computation to reach [after t]. *) - - (** [before t] it the state's hash at the beginning of the computation. *) - val before : 'a t -> kinded_hash - - (** [after t] is the state's hash at the end of the computation. *) - val after : 'a t -> kinded_hash - - (** [proof t] is a subset of the initial state needed to prove that the proven - computation could run without performing any I/O. *) - val state : 'a t -> 'a - - val v : before:kinded_hash -> after:kinded_hash -> 'a -> 'a t + include module type of struct + include Proof_types + end end module type S = sig @@ -327,6 +372,12 @@ module type S = sig module Proof : PROOF + (** The type for context repositories. *) + type index + + (** The type of tree for which to build a shallow tree with [shallow] *) + type kinded_hash := [`Value of Context_hash.t | `Node of Context_hash.t] + module Tree : sig include TREE @@ -353,69 +404,86 @@ module type S = sig (** [of_raw t] is the tree equivalent to the raw tree [t]. *) val of_raw : raw -> tree - (** The type of tree for which to build a shallow tree with [shallow] *) - type kinded_hash := [`Contents of Context_hash.t | `Node of Context_hash.t] - type repo val make_repo : unit -> repo Lwt.t + (** [shallow repo h] is the shallow tree having hash [h] based on + the repository [r]. *) val shallow : repo -> kinded_hash -> tree - - (** [produce r h f] runs [f] on top of a real store [r], producing a proof - and a reulst using the initial root hash [h]. - - The trees produced during [f]'s computation will carry the full history - of reads. This history will be reset when [f] is complete so subtrees - escaping the scope of [f] will not cause memory leaks. - - It is possible to call [produce_proof] recursively. In that case, each - input trees will have their own history of reads and will contain only - the reads needed to unshallow that corresponding trees. Proof trees - proof should then interact as if they were all unshallowed (note: in the - case of nested proofs, it's unclear what [verify_proof] should do...). *) - type ('proof, 'result) producer := - repo -> - kinded_hash -> - (tree -> (tree * 'result) Lwt.t) -> - ('proof * 'result) Lwt.t - - (** [verify t f] runs [f] in checking mode, loading data from the proof as - needed. - - The generated tree is the tree after [f] has completed. More operations - can be run on that tree, but it won't be able to access the underlying - storage. - - Raise [Proof.Bad_proof] when the proof is rejected. *) - type ('proof, 'result) verifier := - 'proof -> - (tree -> (tree * 'result) Lwt.t) -> - (tree * 'result, [`Msg of string]) result Lwt.t - - (** The type for tree proofs. - - Guarantee that the given computation performs exactly the same state - operations as the generating computation, *in some order*. *) - type tree_proof := Proof.tree Proof.t - - (** [produce_proof] is the producer of tree proofs. *) - val produce_proof : (tree_proof, 'a) producer - - (** [verify_proof] is the verifier of tree proofs. *) - val verify_proof : (tree_proof, 'a) verifier - - (** The type for stream proofs. - - Guarantee that the given computation performs exactly the same state - operations as the generating computation, in the exact same order.*in - some order*. *) - type stream_proof := Proof.stream Proof.t - - (** [produce_stream] is the producer of stream proofs. *) - val produce_stream : (stream_proof, 'a) producer - - (** [verify_stream] is the verifier of stream proofs. *) - val verify_stream : (stream_proof, 'a) verifier end + + (** [produce r h f] runs [f] on top of a real store [r], producing a proof and + a result using the initial root hash [h]. + + The trees produced during [f]'s computation will carry the full history of + reads. This history will be reset when [f] is complete so subtrees + escaping the scope of [f] will not cause memory leaks. + + Calling [produce_proof] recursively has an undefined behaviour. *) + type ('proof, 'result) producer := + index -> + kinded_hash -> + (tree -> (tree * 'result) Lwt.t) -> + ('proof * 'result) Lwt.t + + (** [verify p f] runs [f] in checking mode. [f] is a function that takes a + tree as input and returns a new version of the tree and a result. [p] is a + proof, that is a minimal representation of the tree that contains what [f] + should be expecting. + + Therefore, contrary to trees found in a storage, the contents of the trees + passed to [f] may not be available. For this reason, looking up a value at + some [path] can now produce three distinct outcomes: + - A value [v] is present in the proof [p] and returned : [find tree path] + is a promise returning [Some v]; + - [path] is known to have no value in [tree] : [find tree path] is a + promise returning [None]; and + - [path] is known to have a value in [tree] but [p] does not provide it + because [f] should not need it: [verify] returns an error classifying + [path] as an invalid path (see below). + + The same semantics apply to all operations on the tree [t] passed to [f] + and on all operations on the trees built from [f]. + + The generated tree is the tree after [f] has completed. That tree is + disconnected from any storage (i.e. [index]). It is possible to run + operations on it as long as they don't require loading shallowed subtrees. + + The result is [Error _] if the proof is rejected: + - For tree proofs: when [p.before] is different from the hash of + [p.state]; + - For tree and stream proofs: when [p.after] is different from the hash + of [f p.state]; + - For tree and stream proofs: when [f p.state] tries to access paths + invalid paths in [p.state]; + - For stream proofs: when the proof is not empty once [f] is done. *) + type ('proof, 'result) verifier := + 'proof -> + (tree -> (tree * 'result) Lwt.t) -> + (tree * 'result, [`Msg of string]) result Lwt.t + + (** The type for tree proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, *in some order*. *) + type tree_proof := Proof.tree Proof.t + + (** [produce_tree_proof] is the producer of tree proofs. *) + val produce_tree_proof : (tree_proof, 'a) producer + + (** [verify_tree_proof] is the verifier of tree proofs. *) + val verify_tree_proof : (tree_proof, 'a) verifier + + (** The type for stream proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, in the exact same order. *) + type stream_proof := Proof.stream Proof.t + + (** [produce_stream_proof] is the producer of stream proofs. *) + val produce_stream_proof : (stream_proof, 'a) producer + + (** [verify_stream] is the verifier of stream proofs. *) + val verify_stream_proof : (stream_proof, 'a) verifier end diff --git a/src/lib_protocol_environment/dummy_context.ml b/src/lib_protocol_environment/dummy_context.ml index d3dbd7b33b8ce27745b4183563b7d32d3d8f8fe6..9252db4464b93709abd333d3daa5cb482077d8df 100644 --- a/src/lib_protocol_environment/dummy_context.ml +++ b/src/lib_protocol_environment/dummy_context.ml @@ -73,6 +73,7 @@ module M = struct end include Tree + module Proof = Memory_context.M.Proof let set_protocol _ _ = assert false @@ -83,6 +84,10 @@ module M = struct let set_hash_version _ _ = assert false let get_hash_version _ = assert false + + let verify_tree_proof _ _ = assert false + + let verify_stream_proof _ _ = assert false end open Tezos_protocol_environment diff --git a/src/lib_protocol_environment/environment_context.ml b/src/lib_protocol_environment/environment_context.ml index 5095ee9e1da41929d114ed0856d4f23f572be675..05736aedb2fb3e503d3e550368d5754d34568a57 100644 --- a/src/lib_protocol_environment/environment_context.ml +++ b/src/lib_protocol_environment/environment_context.ml @@ -253,6 +253,51 @@ module Context = struct Ops.Tree.clear ?depth tree end + (* Proof *) + module Proof = Tezos_context_sigs.Context.Proof_types + + (* In-memory context for proof *) + module Proof_context = struct + module M = struct + include Tezos_context_memory.Context + + let set_protocol = add_protocol + + let fork_test_chain c ~protocol:_ ~expiration:_ = Lwt.return c + end + + let equality_witness : (M.t, M.tree) equality_witness = equality_witness () + + let ops = (module M : S with type t = 'ctxt and type tree = 'tree) + + let impl_name = "proof" + + let inject : M.tree -> tree = + fun tree -> Tree {ops; tree; equality_witness; impl_name} + + let project : tree -> M.tree = + fun (Tree t) -> + match equiv t.equality_witness equality_witness with + | (Some Refl, Some Refl) -> t.tree + | _ -> err_implementation_mismatch ~expected:impl_name ~got:t.impl_name + end + + let verify_tree_proof proof (f : tree -> (tree * 'a) Lwt.t) = + Proof_context.M.verify_tree_proof proof (fun tree -> + let tree = Proof_context.inject tree in + f tree >|= fun (tree, r) -> (Proof_context.project tree, r)) + >|= function + | Ok (tree, r) -> Ok (Proof_context.inject tree, r) + | Error e -> Error e + + let verify_stream_proof proof (f : tree -> (tree * 'a) Lwt.t) = + Proof_context.M.verify_stream_proof proof (fun tree -> + let tree = Proof_context.inject tree in + f tree >|= fun (tree, r) -> (Proof_context.project tree, r)) + >|= function + | Ok (tree, r) -> Ok (Proof_context.inject tree, r) + | Error e -> Error e + type cache_key = Environment_cache.key type block_cache = {context_hash : Context_hash.t; cache : cache} diff --git a/src/lib_protocol_environment/environment_context_intf.ml b/src/lib_protocol_environment/environment_context_intf.ml index 00ff77748c5236cf59b38c79f63fc6ad6e32ba93..86f413b314ba80f5ab5bf4061d92a189439c245f 100644 --- a/src/lib_protocol_environment/environment_context_intf.ml +++ b/src/lib_protocol_environment/environment_context_intf.ml @@ -29,6 +29,67 @@ module Kind = struct type t = [`Value | `Tree] end +module type VIEW = sig + (** @inline *) + include Tezos_context_sigs.Context.VIEW +end + +module type TREE = sig + (** @inline *) + include Tezos_context_sigs.Context.TREE +end + +module type PROOF = sig + (** @inline *) + include Tezos_context_sigs.Context.PROOF +end + +module type HASH_VERSION = sig + (** @inline *) + include Tezos_context_sigs.Context.HASH_VERSION +end + +(* Copy of sigs/v3/context.mli:CACHE *) +module type CACHE = sig + type t + + type size + + type index + + type identifier + + type key + + type value = .. + + val key_of_identifier : cache_index:index -> identifier -> key + + val identifier_of_key : key -> identifier + + val pp : Format.formatter -> t -> unit + + val find : t -> key -> value option Lwt.t + + val set_cache_layout : t -> size list -> t Lwt.t + + val update : t -> key -> (value * size) option -> t + + val sync : t -> cache_nonce:Bytes.t -> t Lwt.t + + val clear : t -> t + + val list_keys : t -> cache_index:index -> (key * size) list option + + val key_rank : t -> key -> int option + + val future_cache_expectation : t -> time_in_blocks:int -> t + + val cache_size : t -> cache_index:index -> size option + + val cache_size_limit : t -> cache_index:index -> size option +end + module type CORE = sig type t @@ -185,83 +246,54 @@ module V4 = struct include CORE with type t := t end - (* Copy of sigs/v3/context.mli:CACHE *) - module type CACHE = sig - type t - - type size - - type index - - type identifier - - type key - - type value = .. - - val key_of_identifier : cache_index:index -> identifier -> key - - val identifier_of_key : key -> identifier - - val pp : Format.formatter -> t -> unit - - val find : t -> key -> value option Lwt.t - - val set_cache_layout : t -> size list -> t Lwt.t - - val update : t -> key -> (value * size) option -> t + module type CACHE = CACHE +end - val sync : t -> cache_nonce:Bytes.t -> t Lwt.t +module V5 = struct + type depth = V4.depth - val clear : t -> t + module type VIEW = VIEW - val list_keys : t -> cache_index:index -> (key * size) list option + module Kind = Kind - val key_rank : t -> key -> int option + module type TREE = TREE - val future_cache_expectation : t -> time_in_blocks:int -> t + module type S = sig + include VIEW with type key = string list and type value = bytes - val cache_size : t -> cache_index:index -> size option + module Tree : sig + include + TREE + with type t := t + and type key := key + and type value := value + and type tree := tree - val cache_size_limit : t -> cache_index:index -> size option - end -end + val pp : Format.formatter -> tree -> unit + end -module V5 = V4 + include CORE with type t := t -module type VIEW = sig - (** @inline *) - include Tezos_context_sigs.Context.VIEW -end + module Proof : PROOF -module type TREE = sig - (** @inline *) - include Tezos_context_sigs.Context.TREE -end + type tree_proof := Proof.tree Proof.t -module type HASH_VERSION = sig - (** @inline *) - include Tezos_context_sigs.Context.HASH_VERSION -end + type stream_proof := Proof.stream Proof.t -module type S = sig - include VIEW with type key = string list and type value = bytes + type ('proof, 'result) verifier := + 'proof -> + (tree -> (tree * 'result) Lwt.t) -> + (tree * 'result, [`Msg of string]) result Lwt.t - module Tree : sig - include - TREE - with type t := t - and type key := key - and type value := value - and type tree := tree + val verify_tree_proof : (tree_proof, 'a) verifier - val pp : Format.formatter -> tree -> unit + val verify_stream_proof : (stream_proof, 'a) verifier end - include CORE with type t := t + module type CACHE = CACHE end -module type CACHE = V4.CACHE +module type S = V5.S module type Sigs = sig module V2 = V2 diff --git a/src/lib_protocol_environment/proxy_context.ml b/src/lib_protocol_environment/proxy_context.ml index 19721eab562799eda69b603175396bbf153ac53c..1e10b5311d5d24fdfcec2ffe0eb763384dd1ea02 100644 --- a/src/lib_protocol_environment/proxy_context.ml +++ b/src/lib_protocol_environment/proxy_context.ml @@ -262,6 +262,21 @@ module C = struct let clear ?depth t = Local.Tree.clear ?depth t.tree end + + module Proof = Local.Proof + + let of_local tree = {proxy = None; path = []; tree} + + let map_f f tree = f (of_local tree) >|= fun (t, r) -> (t.tree, r) + + let verify verifier proof f = + verifier proof (map_f f) >|= function + | Ok (t, r) -> Ok (of_local t, r) + | Error _ as e -> e + + let verify_tree_proof p f = verify Local.verify_tree_proof p f + + let verify_stream_proof p f = verify Local.verify_stream_proof p f end open Tezos_protocol_environment diff --git a/src/lib_protocol_environment/sigs/v5/context.mli b/src/lib_protocol_environment/sigs/v5/context.mli index 6114a50ed3948ac432a33e8f2eaf2efdd0191a2f..7146dfa856c00aa4bc0234b95816e1b4a5e46ca1 100644 --- a/src/lib_protocol_environment/sigs/v5/context.mli +++ b/src/lib_protocol_environment/sigs/v5/context.mli @@ -67,6 +67,13 @@ module type VIEW = sig val list : t -> ?offset:int -> ?length:int -> key -> (string * tree) list Lwt.t + (** [length t key] is an Lwt promise that resolves to the number of + files and sub-nodes stored under [k] in [t]. + + It is equivalent to [list t k >|= List.length] but has a + constant-time complexity. *) + val length : t -> key -> int Lwt.t + (** {2 Setters} *) (** [add t k v] is an Lwt promise that resolves to [c] such that: @@ -90,24 +97,22 @@ module type VIEW = sig (** {2 Folding} *) - (** [fold ?depth t root ~init ~f] recursively folds over the trees + (** [fold ?depth t root ~order ~init ~f] recursively folds over the trees and values of [t]. The [f] callbacks are called with a key relative to [root]. [f] is never called with an empty key for values; i.e., folding over a value is a no-op. - Elements are traversed in lexical order of keys. - The depth is 0-indexed. If [depth] is set (by default it is not), then [f] is only called when the conditions described by the parameter is true: - - [Eq d] folds over nodes and contents of depth exactly [d]. - - [Lt d] folds over nodes and contents of depth strictly less than [d]. - - [Le d] folds over nodes and contents of depth less than or equal to [d]. - - [Gt d] folds over nodes and contents of depth strictly more than [d]. - - [Ge d] folds over nodes and contents of depth more than or equal to [d]. + - [Eq d] folds over nodes and values of depth exactly [d]. + - [Lt d] folds over nodes and values of depth strictly less than [d]. + - [Le d] folds over nodes and values of depth less than or equal to [d]. + - [Gt d] folds over nodes and values of depth strictly more than [d]. + - [Ge d] folds over nodes and values of depth more than or equal to [d]. If [order] is [`Sorted] (the default), the elements are traversed in - lexicographic order of their keys. For large nodes, these two modes are memory-consuming, + lexicographic order of their keys. For large nodes, it is memory-consuming, use [`Undefined] for a more memory efficient [fold]. *) val fold : ?depth:depth -> @@ -125,7 +130,9 @@ end module type TREE = sig (** [Tree] provides immutable, in-memory partial mirror of the - context, with lazy reads and delayed writes. + context, with lazy reads and delayed writes. The trees are Merkle + trees that carry the same hash as the part of the context they + mirror. Trees are immutable and non-persistent (they disappear if the host crash), held in memory for efficiency, where reads are done @@ -174,6 +181,182 @@ module type TREE = sig val clear : ?depth:int -> tree -> unit end +module Proof : sig + (** Proofs are compact representations of trees which can be shared + between peers. + + This is expected to be used as follows: + + - A first peer runs a function [f] over a tree [t]. While performing + this computation, it records: the hash of [t] (called [before] + below), the hash of [f t] (called [after] below) and a subset of [t] + which is needed to replay [f] without any access to the first peer's + storage. Once done, all these informations are packed into a proof of + type [t] that is sent to the second peer. + + - The second peer generates an initial tree [t'] from [p] and computes + [f t']. Once done, it compares [t']'s hash and [f t']'s hash to [before] + and [after]. If they match, they know that the result state [f t'] is a + valid context state, without having to have access to the full storage + of the first peer. *) + + (** The type for file and directory names. *) + type step = string + + (** The type for values. *) + type value = bytes + + (** The type of indices for inodes' children. *) + type index = int + + (** The type for hashes. *) + type hash = Context_hash.t + + (** The type for (internal) inode proofs. + + These proofs encode large directories into a tree-like structure. This + reflects irmin-pack's way of representing nodes and computing + hashes (tree-like representations for nodes scales better than flat + representations). + + [length] is the total number of entries in the children of the inode. + It's the size of the "flattened" version of that inode. [length] can be + used to prove the correctness of operations such [Tree.length] and + [Tree.list ~offset ~length] in an efficient way. + + [proofs] contains the children proofs. It is a sparse list of ['a] values. + These values are associated to their index in the list, and the list is + kept sorted in increasing order of indices. ['a] can be a concrete proof + or a hash of that proof. + - In proofs with version 0, inodes have at most 32 proofs + (indexed from 0 to 31). *) + type 'a inode = {length : int; proofs : (index * 'a) list} + + (** The type for inode extenders. + + An extender is a compact representation of a sequence of [inode] which + contain only one child. As for inodes, The ['a] parameter can be a + concrete proof or a hash of that proof. + + If an inode proof contains singleton children [i_0, ..., i_n] such as: + [{length=l; proofs = [ (i_0, {proofs = ... { proofs = [ (i_n, p) ] }})]}], + then it is compressed into the inode extender + [{length=l; segment = [i_0;..;i_n]; proof=p}] sharing the same lenght [l] + and final proof [p]. *) + type 'a inode_extender = {length : int; segment : index list; proof : 'a} + [@@deriving irmin] + + (** The type for compressed and partial Merkle tree proofs. + + Tree proofs do not provide any guarantee with the ordering of + computations. For instance, if two effects commute, they won't be + distinguishable by this kind of proofs. + + [Value v] proves that a value [v] exists in the store. + + [Blinded_value h] proves a value with hash [h] exists in the store. + + [Node ls] proves that a a "flat" node containing the list of files [ls] + exists in the store. + - In proofs with version 0, the length of [ls] is at most 256; + + [Blinded_node h] proves that a node with hash [h] exists in the store. + + [Inode i] proves that an inode [i] exists in the store. + + [Extender e] proves that an inode extender [e] exist in the store. *) + type tree = + | Value of value + | Blinded_value of hash + | Node of (step * tree) list + | Blinded_node of hash + | Inode of inode_tree inode + | Extender of inode_tree inode_extender + + (** The type for inode trees. It is a subset of [tree], limited to nodes. + + [Blinded_inode h] proves that an inode with hash [h] exists in the store. + + [Inode_values ls] is simliar to trees' [Node]. + + [Inode_tree i] is similar to tree's [Inode]. + + [Inode_extender e] is similar to trees' [Extender]. *) + and inode_tree = + | Blinded_inode of hash + | Inode_values of (step * tree) list + | Inode_tree of inode_tree inode + | Inode_extender of inode_tree inode_extender + + (** The type for kinded hashes. *) + type kinded_hash = [`Value of hash | `Node of hash] + + module Stream : sig + (** Stream proofs represent an explicit traversal of a Merle tree proof. + Every element (a node, a value, or a shallow pointer) met is first + "compressed" by shallowing its children and then recorded in the proof. + + As stream proofs directly encode the recursive construction of the + Merkle root hash is slightly simpler to implement: verifier simply + need to hash the compressed elements lazily, without any memory or + choice. + + Moreover, the minimality of stream proofs is trivial to check. + Once the computation has consumed the compressed elements required, + it is sufficient to check that no more compressed elements remain + in the proof. + + However, as the compressed elements contain all the hashes of their + shallow children, the size of stream proofs is larger + (at least double in size in practice) than tree proofs, which only + contains the hash for intermediate shallow pointers. *) + + (** The type for elements of stream proofs. + + [Value v] is a proof that the next element read in the store is the + value [v]. + + [Node n] is a proof that the next element read in the store is the + node [n]. + + [Inode i] is a proof that the next element read in the store is the + inode [i]. + + [Inode_extender e] is a proof that the next element read in the store + is the node extender [e]. *) + type elt = + | Value of value + | Node of (step * kinded_hash) list + | Inode of hash inode + | Inode_extender of hash inode_extender + + (** The type for stream proofs. + + The sequance [e_1 ... e_n] proves that the [e_1], ..., [e_n] are + read in the store in sequence. *) + type t = elt Seq.t + end + + type stream = Stream.t + + (** The type for proofs of kind ['a]. + + A proof [p] proves that the state advanced from [before p] to + [after p]. [state p]'s hash is [before p], and [state p] contains + the minimal information for the computation to reach [after p]. + + [version p] is the proof version, currently only version 0 is supported. + - Proofs with version 0 have top-level nodes of size 256. Whenever a node + has more than 256 entries, it is converted into an inode tree with + an branching factor of 32. *) + type 'a t = { + version : int; + before : kinded_hash; + after : kinded_hash; + state : 'a; + } +end + include VIEW with type key = string list and type value = bytes module Tree : @@ -183,6 +366,60 @@ module Tree : and type value := value and type tree := tree +(** [verify p f] runs [f] in checking mode. [f] is a function that takes a + tree as input and returns a new version of the tree and a result. [p] is a + proof, that is a minimal representation of the tree that contains what [f] + should be expecting. + + Therefore, contrary to trees found in a storage, the contents of the trees + passed to [f] may not be available. For this reason, looking up a value at + some [path] can now produce three distinct outcomes: + - A value [v] is present in the proof [p] and returned : [find tree path] + is a promise returning [Some v]; + - [path] is known to have no value in [tree] : [find tree path] is a + promise returning [None]; and + - [path] is known to have a value in [tree] but [p] does not provide it + because [f] should not need it: [verify] returns an error classifying + [path] as an invalid path (see below). + + The same semantics apply to all operations on the tree [t] passed to [f] + and on all operations on the trees built from [f]. + + The generated tree is the tree after [f] has completed. That tree is + disconnected from any storage (i.e. [index]). It is possible to run + operations on it as long as they don't require loading shallowed subtrees. + + The result is [Error _] if the proof is rejected: + - For tree proofs: when [p.before] is different from the hash of + [p.state]; + - For tree and stream proofs: when [p.after] is different from the hash + of [f p.state]; + - For tree and stream proofs: when [f p.state] tries to access paths + invalid paths in [p.state]; + - For stream proofs: when the proof is not empty once [f] is done. *) +type ('proof, 'result) verifier := + 'proof -> + (tree -> (tree * 'result) Lwt.t) -> + (tree * 'result, [`Msg of string]) result Lwt.t + +(** The type for tree proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, *in some order*. *) +type tree_proof := Proof.tree Proof.t + +(** [verify_tree_proof] is the verifier of tree proofs. *) +val verify_tree_proof : (tree_proof, 'a) verifier + +(** The type for stream proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, in the exact same order. *) +type stream_proof := Proof.stream Proof.t + +(** [verify_stream] is the verifier of stream proofs. *) +val verify_stream_proof : (stream_proof, 'a) verifier + val register_resolver : 'a Base58.encoding -> (t -> string -> 'a list Lwt.t) -> unit diff --git a/src/lib_proxy/light_internal.ml b/src/lib_proxy/light_internal.ml index ae818c913bc714a7b186155bfab5fa06b9809a44..0fae61c74ae5c980e60894168dc6ff0bc3be8b99 100644 --- a/src/lib_proxy/light_internal.ml +++ b/src/lib_proxy/light_internal.ml @@ -68,7 +68,7 @@ module Merkle = struct | Ok hash_str -> let irmin_hash = match kind with - | Contents -> `Contents hash_str + | Contents -> `Value hash_str | Node -> `Node hash_str in return @@ Store.Tree.shallow repo irmin_hash) diff --git a/src/lib_proxy/local_context.ml b/src/lib_proxy/local_context.ml index 1c8466b98f4a3243ade7695185f099279e5f0180..db4148e8ae88ee083a367b5f70c7d06365aaa0db 100644 --- a/src/lib_proxy/local_context.ml +++ b/src/lib_proxy/local_context.ml @@ -31,7 +31,7 @@ let store_empty = empty let shallow_of_tree repo tree = let h = Tree.hash tree in let hash = - match Tree.kind tree with `Tree -> `Node h | `Value -> `Contents h + match Tree.kind tree with `Tree -> `Node h | `Value -> `Value h in Tree.shallow repo hash diff --git a/src/lib_proxy/test/test_fuzzing_light.ml b/src/lib_proxy/test/test_fuzzing_light.ml index 12ebf797e77dff40fc7604d67216c1eb1d1e9913..faab761e02c6c8922ccf33b6ca04e0868f897932 100644 --- a/src/lib_proxy/test/test_fuzzing_light.ml +++ b/src/lib_proxy/test/test_fuzzing_light.ml @@ -353,7 +353,7 @@ module HashStability = struct let hash = Store.Tree.hash tree in let data = match Store.Tree.kind tree with - | `Value -> `Contents hash + | `Value -> `Value hash | `Tree -> `Node hash in Store.Tree.shallow repo data diff --git a/src/proto_alpha/lib_protocol/raw_context.ml b/src/proto_alpha/lib_protocol/raw_context.ml index 3b7ca36d4322a4ed11b66d8a4ad5f49fe2a28f9a..edeca90b1bce25f65d796072565eb41a4d4ae30b 100644 --- a/src/proto_alpha/lib_protocol/raw_context.ml +++ b/src/proto_alpha/lib_protocol/raw_context.ml @@ -991,6 +991,8 @@ let list ctxt ?offset ?length k = Context.list (context ctxt) ?offset ?length k let fold ?depth ctxt k ~order ~init ~f = Context.fold ?depth (context ctxt) k ~order ~init ~f +module Proof = Context.Proof + module Tree : Raw_context_intf.TREE with type t := t @@ -1050,6 +1052,10 @@ module Tree : | Some v -> add_tree t k v end +let verify_tree_proof proof f = Context.verify_tree_proof proof f + +let verify_stream_proof proof f = Context.verify_stream_proof proof f + let project x = x let absolute_key _ k = k diff --git a/src/proto_alpha/lib_protocol/raw_context_intf.ml b/src/proto_alpha/lib_protocol/raw_context_intf.ml index dc1817f0a2d637385954e5537b31d03c27529c38..71b0d69996dc363f4eb854e8c0066946204862d4 100644 --- a/src/proto_alpha/lib_protocol/raw_context_intf.ml +++ b/src/proto_alpha/lib_protocol/raw_context_intf.ml @@ -28,22 +28,25 @@ as-is for direct context accesses, and used in {!Storage_functors} to provide restricted views to the context. *) +(** The tree depth of a fold. See the [fold] function for more information. *) +type depth = [`Eq of int | `Le of int | `Lt of int | `Ge of int | `Gt of int] + module type VIEW = sig (* Same as [Environment_context.VIEW] but with extra getters and setters functions. *) - (** The type for context handler. *) + (** The type for context views. *) type t - (** The type for context trees. *) - type tree - (** The type for context keys. *) type key = string list (** The type for context values. *) type value = bytes + (** The type for context trees. *) + type tree + (** {2 Getters} *) (** [mem t k] is an Lwt promise that resolves to [true] iff [k] is bound @@ -148,23 +151,25 @@ module type VIEW = sig (** {2 Folds} *) - (** [fold ?depth t root ~init ~f] recursively folds over the trees + (** [fold ?depth t root ~order ~init ~f] recursively folds over the trees and values of [t]. The [f] callbacks are called with a key relative to [root]. [f] is never called with an empty key for values; i.e., folding over a value is a no-op. - Elements are traversed in lexical order of keys. - The depth is 0-indexed. If [depth] is set (by default it is not), then [f] is only called when the conditions described by the parameter is true: - - [Eq d] folds over nodes and contents of depth exactly [d]. - - [Lt d] folds over nodes and contents of depth strictly less than [d]. - - [Le d] folds over nodes and contents of depth less than or equal to [d]. - - [Gt d] folds over nodes and contents of depth strictly more than [d]. - - [Ge d] folds over nodes and contents of depth more than or equal to [d]. *) + - [Eq d] folds over nodes and values of depth exactly [d]. + - [Lt d] folds over nodes and values of depth strictly less than [d]. + - [Le d] folds over nodes and values of depth less than or equal to [d]. + - [Gt d] folds over nodes and values of depth strictly more than [d]. + - [Ge d] folds over nodes and values of depth more than or equal to [d]. + + If [order] is [`Sorted] (the default), the elements are traversed in + lexicographic order of their keys. For large nodes, it is memory-consuming, + use [`Undefined] for a more memory efficient [fold]. *) val fold : - ?depth:[`Eq of int | `Le of int | `Lt of int | `Ge of int | `Gt of int] -> + ?depth:depth -> t -> key -> order:[`Sorted | `Undefined] -> @@ -173,6 +178,10 @@ module type VIEW = sig 'a Lwt.t end +module Kind = struct + type t = [`Value | `Tree] +end + module type TREE = sig (** [Tree] provides immutable, in-memory partial mirror of the context, with lazy reads and delayed writes. The trees are Merkle @@ -201,9 +210,11 @@ module type TREE = sig (** [kind t] is [t]'s kind. It's either a tree node or a leaf value. *) - val kind : tree -> [`Value | `Tree] + val kind : tree -> Kind.t - (** [to_value t] is [Some v] is [t] is a leaf tree and [None] otherwise. *) + (** [to_value t] is an Lwt promise that resolves to [Some v] if [t] + is a leaf tree and [None] otherwise. It is equivalent to [find t + []]. *) val to_value : tree -> value option Lwt.t (** [hash t] is [t]'s Merkle hash. *) @@ -220,6 +231,182 @@ module type TREE = sig val clear : ?depth:int -> tree -> unit end +module type PROOF = sig + (** Proofs are compact representations of trees which can be shared + between peers. + + This is expected to be used as follows: + + - A first peer runs a function [f] over a tree [t]. While performing + this computation, it records: the hash of [t] (called [before] + below), the hash of [f t] (called [after] below) and a subset of [t] + which is needed to replay [f] without any access to the first peer's + storage. Once done, all these informations are packed into a proof of + type [t] that is sent to the second peer. + + - The second peer generates an initial tree [t'] from [p] and computes + [f t']. Once done, it compares [t']'s hash and [f t']'s hash to [before] + and [after]. If they match, they know that the result state [f t'] is a + valid context state, without having to have access to the full storage + of the first peer. *) + + (** The type for file and directory names. *) + type step = string + + (** The type for values. *) + type value = bytes + + (** The type of indices for inodes' children. *) + type index = int + + (** The type for hashes. *) + type hash = Context_hash.t + + (** The type for (internal) inode proofs. + + These proofs encode large directories into a tree-like structure. This + reflects irmin-pack's way of representing nodes and computing + hashes (tree-like representations for nodes scales better than flat + representations). + + [length] is the total number of entries in the children of the inode. + It's the size of the "flattened" version of that inode. [length] can be + used to prove the correctness of operations such [Tree.length] and + [Tree.list ~offset ~length] in an efficient way. + + [proofs] contains the children proofs. It is a sparse list of ['a] values. + These values are associated to their index in the list, and the list is + kept sorted in increasing order of indices. ['a] can be a concrete proof + or a hash of that proof. + - In proofs with version 0, inodes have at most 32 proofs + (indexed from 0 to 31). *) + type 'a inode = {length : int; proofs : (index * 'a) list} + + (** The type for inode extenders. + + An extender is a compact representation of a sequence of [inode] which + contain only one child. As for inodes, The ['a] parameter can be a + concrete proof or a hash of that proof. + + If an inode proof contains singleton children [i_0, ..., i_n] such as: + [{length=l; proofs = [ (i_0, {proofs = ... { proofs = [ (i_n, p) ] }})]}], + then it is compressed into the inode extender + [{length=l; segment = [i_0;..;i_n]; proof=p}] sharing the same lenght [l] + and final proof [p]. *) + type 'a inode_extender = {length : int; segment : index list; proof : 'a} + [@@deriving irmin] + + (** The type for compressed and partial Merkle tree proofs. + + Tree proofs do not provide any guarantee with the ordering of + computations. For instance, if two effects commute, they won't be + distinguishable by this kind of proofs. + + [Value v] proves that a value [v] exists in the store. + + [Blinded_value h] proves a value with hash [h] exists in the store. + + [Node ls] proves that a a "flat" node containing the list of files [ls] + exists in the store. + - In proofs with version 0, the length of [ls] is at most 256; + + [Blinded_node h] proves that a node with hash [h] exists in the store. + + [Inode i] proves that an inode [i] exists in the store. + + [Extender e] proves that an inode extender [e] exist in the store. *) + type tree = + | Value of value + | Blinded_value of hash + | Node of (step * tree) list + | Blinded_node of hash + | Inode of inode_tree inode + | Extender of inode_tree inode_extender + + (** The type for inode trees. It is a subset of [tree], limited to nodes. + + [Blinded_inode h] proves that an inode with hash [h] exists in the store. + + [Inode_values ls] is simliar to trees' [Node]. + + [Inode_tree i] is similar to tree's [Inode]. + + [Inode_extender e] is similar to trees' [Extender]. *) + and inode_tree = + | Blinded_inode of hash + | Inode_values of (step * tree) list + | Inode_tree of inode_tree inode + | Inode_extender of inode_tree inode_extender + + (** The type for kinded hashes. *) + type kinded_hash = [`Value of hash | `Node of hash] + + module Stream : sig + (** Stream proofs represent an explicit traversal of a Merle tree proof. + Every element (a node, a value, or a shallow pointer) met is first + "compressed" by shallowing its children and then recorded in the proof. + + As stream proofs directly encode the recursive construction of the + Merkle root hash is slightly simpler to implement: verifier simply + need to hash the compressed elements lazily, without any memory or + choice. + + Moreover, the minimality of stream proofs is trivial to check. + Once the computation has consumed the compressed elements required, + it is sufficient to check that no more compressed elements remain + in the proof. + + However, as the compressed elements contain all the hashes of their + shallow children, the size of stream proofs is larger + (at least double in size in practice) than tree proofs, which only + contains the hash for intermediate shallow pointers. *) + + (** The type for elements of stream proofs. + + [Value v] is a proof that the next element read in the store is the + value [v]. + + [Node n] is a proof that the next element read in the store is the + node [n]. + + [Inode i] is a proof that the next element read in the store is the + inode [i]. + + [Inode_extender e] is a proof that the next element read in the store + is the node extender [e]. *) + type elt = + | Value of value + | Node of (step * kinded_hash) list + | Inode of hash inode + | Inode_extender of hash inode_extender + + (** The type for stream proofs. + + The sequance [e_1 ... e_n] proves that the [e_1], ..., [e_n] are + read in the store in sequence. *) + type t = elt Seq.t + end + + type stream = Stream.t + + (** The type for proofs of kind ['a]. + + A proof [p] proves that the state advanced from [before p] to + [after p]. [state p]'s hash is [before p], and [state p] contains + the minimal information for the computation to reach [after p]. + + [version p] is the proof version, currently only version 0 is supported. + - Proofs with version 0 have top-level nodes of size 256. Whenever a node + has more than 256 entries, it is converted into an inode tree with + an branching factor of 32. *) + type 'a t = { + version : int; + before : kinded_hash; + after : kinded_hash; + state : 'a; + } +end + module type T = sig (** The type for root contexts. *) type root @@ -233,6 +420,62 @@ module type T = sig and type value := value and type tree := tree + module Proof : PROOF + + (** [verify p f] runs [f] in checking mode. [f] is a function that takes a + tree as input and returns a new version of the tree and a result. [p] is a + proof, that is a minimal representation of the tree that contains what [f] + should be expecting. + + Therefore, contrary to trees found in a storage, the contents of the trees + passed to [f] may not be available. For this reason, looking up a value at + some [path] can now produce three distinct outcomes: + - A value [v] is present in the proof [p] and returned : [find tree path] + is a promise returning [Some v]; + - [path] is known to have no value in [tree] : [find tree path] is a + promise returning [None]; and + - [path] is known to have a value in [tree] but [p] does not provide it + because [f] should not need it: [verify] returns an error classifying + [path] as an invalid path (see below). + + The same semantics apply to all operations on the tree [t] passed to [f] + and on all operations on the trees built from [f]. + + The generated tree is the tree after [f] has completed. That tree is + disconnected from any storage (i.e. [index]). It is possible to run + operations on it as long as they don't require loading shallowed subtrees. + + The result is [Error _] if the proof is rejected: + - For tree proofs: when [p.before] is different from the hash of + [p.state]; + - For tree and stream proofs: when [p.after] is different from the hash + of [f p.state]; + - For tree and stream proofs: when [f p.state] tries to access paths + invalid paths in [p.state]; + - For stream proofs: when the proof is not empty once [f] is done. *) + type ('proof, 'result) verifier := + 'proof -> + (tree -> (tree * 'result) Lwt.t) -> + (tree * 'result, [`Msg of string]) result Lwt.t + + (** The type for tree proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, *in some order*. *) + type tree_proof := Proof.tree Proof.t + + (** [verify_tree_proof] is the verifier of tree proofs. *) + val verify_tree_proof : (tree_proof, 'a) verifier + + (** The type for stream proofs. + + Guarantee that the given computation performs exactly the same state + operations as the generating computation, in the exact same order. *) + type stream_proof := Proof.stream Proof.t + + (** [verify_stream] is the verifier of stream proofs. *) + val verify_stream_proof : (stream_proof, 'a) verifier + (** Internally used in {!Storage_functors} to escape from a view. *) val project : t -> root diff --git a/src/proto_alpha/lib_protocol/storage_functors.ml b/src/proto_alpha/lib_protocol/storage_functors.ml index 3b3a257f3c68ea075dca010de303c3a5de6f9de9..1b15d18dddcc8ee8331a16afddc9e3d10fa2ae8d 100644 --- a/src/proto_alpha/lib_protocol/storage_functors.ml +++ b/src/proto_alpha/lib_protocol/storage_functors.ml @@ -113,6 +113,11 @@ module Make_subcontext (R : REGISTER) (C : Raw_context.T) (N : NAME) : C.fold ?depth t (to_key k) ~order ~init ~f module Tree = C.Tree + module Proof = C.Proof + + let verify_tree_proof = C.verify_tree_proof + + let verify_stream_proof = C.verify_stream_proof let project = C.project @@ -784,6 +789,12 @@ module Make_indexed_subcontext (C : Raw_context.T) (I : INDEX) : C.Tree.empty t end + module Proof = C.Proof + + let verify_tree_proof = C.verify_tree_proof + + let verify_stream_proof = C.verify_stream_proof + let project c = let (t, _) = unpack c in C.project t