From 4170d640b00ab8b79dbb84ab7a3ae0dbec917caa Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Wed, 30 Jan 2019 08:08:20 +0100 Subject: [PATCH 1/5] irmin-lmdb/GC: add a generational, stop-the-world GC Use `tezos-node run --gc` to enable the GC on startup. The GC will look for the last checkpoint on every chain and will promote the contexts which are alive to a new lmdb database; once the GC is complete, the lmdb databases are atomically swapped. --- src/bin_node/node_run_command.ml | 72 ++- src/lib_shell/state.ml | 1 + src/lib_storage/context.ml | 21 +- src/lib_storage/context.mli | 4 + src/lib_storage/test/test_context.ml | 1 + vendors/irmin-lmdb/dune | 2 +- vendors/irmin-lmdb/irmin-lmdb.opam | 2 + vendors/irmin-lmdb/irmin_lmdb.ml | 718 ++++++++++++++++++--------- vendors/irmin-lmdb/irmin_lmdb.mli | 34 +- vendors/irmin-lmdb/test/dune | 3 + vendors/irmin-lmdb/test/test.ml | 131 +++++ 11 files changed, 750 insertions(+), 239 deletions(-) create mode 100644 vendors/irmin-lmdb/test/dune create mode 100644 vendors/irmin-lmdb/test/test.ml diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index 578814edfee2..eec731289631 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -233,7 +233,59 @@ let init_signal () = ignore (Lwt_unix.on_signal Sys.sigint (handler "INT") : Lwt_unix.signal_handler_id) ; ignore (Lwt_unix.on_signal Sys.sigterm (handler "TERM") : Lwt_unix.signal_handler_id) -let run ?verbosity ?sandbox ?checkpoint (config : Node_config_file.t) = +let known_heads store chain_id = + let chain = Store.Chain.get store chain_id in + Store.Chain_data.(Known_heads.read_all @@ get chain) + +let predecessors store max_level head = + let rec aux acc hash = + Store.Block.Header.read_exn (store, hash) >>= fun h -> + if h.shell.level < max_level then Lwt.return acc + else + Store.Block.Predecessors.read_exn (store, hash) 0 >>= fun pred -> + let acc = Context_hash.Set.add h.shell.context acc in + aux acc pred + in + aux Context_hash.Set.empty head + +let genesis_block store id = + let store = Store.Chain.get store id in + Store.Chain.Genesis_hash.read_exn store >>= fun genesis -> + Store.Block.Header.read_exn (Store.Block.get store, genesis) >|= fun h -> + h.shell.context + +let run_gc (config : Node_config_file.t) checkpoint = + Store.init ~mapsize:4_000_000_000_000L (store_dir config.data_dir) + >>= function + | Error _e -> Lwt.fail_with "Store.init" (* XXX: use the error monad *) + | Ok store -> + let alive_in_chain acc id = + genesis_block store id >>= fun genesis -> + known_heads store id >>= fun heads -> + let chain_t = Store.Chain.get store id in + let block_t = Store.Block.get chain_t in + let chain_data_t = Store.Chain_data.get chain_t in + (match checkpoint with + | Some (n, _) -> Lwt.return n + | None -> Store.Chain_data.Checkpoint.read_exn chain_data_t >|= fst) + >>= fun max_level -> + Lwt_list.fold_left_s (fun acc head -> + predecessors block_t max_level head >|= fun hashes -> + Context_hash.Set.union acc hashes + ) acc (Block_hash.Set.elements heads) + >|= fun hashes -> + (* Always promote the genesis block *) + Context_hash.Set.add genesis hashes + in + Store.Chain.list store >>= fun ids -> + Lwt_list.fold_left_s alive_in_chain Context_hash.Set.empty ids + >>= fun roots -> + Tezos_storage.Context.init + ~mapsize:4_000_000_000_000L (context_dir config.data_dir) + >>= fun context -> + Tezos_storage.Context.gc context ~roots + +let run ?verbosity ?sandbox ?checkpoint ~gc (config : Node_config_file.t) = Node_data_version.ensure_data_dir config.data_dir >>=? fun () -> Lwt_lock_file.create ~unlink_on_exit:true (lock_file config.data_dir) >>=? fun () -> @@ -243,6 +295,7 @@ let run ?verbosity ?sandbox ?checkpoint (config : Node_config_file.t) = | None -> config.log | Some default_level -> { config.log with default_level } in Logging_unix.init ~cfg:log_cfg () >>= fun () -> + (if not gc then Lwt.return () else run_gc config checkpoint) >>= fun () -> Updater.init (protocol_dir config.data_dir) ; lwt_log_notice "Starting the Tezos node..." >>= fun () -> init_node ?sandbox ?checkpoint config >>=? fun node -> @@ -257,7 +310,7 @@ let run ?verbosity ?sandbox ?checkpoint (config : Node_config_file.t) = Logging_unix.close () >>= fun () -> return_unit -let process sandbox verbosity checkpoint args = +let process sandbox verbosity checkpoint gc args = let verbosity = match verbosity with | [] -> None @@ -305,7 +358,7 @@ let process sandbox verbosity checkpoint args = (lock_file config.data_dir) >>=? function | false -> Lwt.catch - (fun () -> run ?sandbox ?verbosity ?checkpoint config) + (fun () -> run ?sandbox ?verbosity ?checkpoint ~gc config) (function |Unix.Unix_error(Unix.EADDRINUSE, "bind","") -> begin match config.rpc.listen_addr with @@ -358,8 +411,19 @@ module Term = struct info ~docs:Node_shared_arg.Manpage.misc_section ~doc ~docv:"," ["checkpoint"]) + let gc = + let open Cmdliner in + let doc = + "Run the GC and exit. Use 0 to use the last-fork point, \ + otherwise keep the latest $(b, N) contexts." + in + Arg.(value + & flag + & info ~doc ~docv:"N" ~docs:Node_shared_arg.Manpage.misc_section + ["gc"]) + let term = - Cmdliner.Term.(ret (const process $ sandbox $ verbosity $ checkpoint $ + Cmdliner.Term.(ret (const process $ sandbox $ verbosity $ checkpoint $ gc $ Node_shared_arg.Term.args)) end diff --git a/src/lib_shell/state.ml b/src/lib_shell/state.ml index 7f3b9d69671e..c5eeed351c99 100644 --- a/src/lib_shell/state.ml +++ b/src/lib_shell/state.ml @@ -1302,6 +1302,7 @@ let init return (state, main_chain_state, context_index) let close { global_data } = + Context.close () >>= fun () -> Shared.use global_data begin fun { global_store } -> Store.close global_store ; Lwt.return_unit diff --git a/src/lib_storage/context.ml b/src/lib_storage/context.ml index 6db05a472fc3..a3099394c1d6 100644 --- a/src/lib_storage/context.ml +++ b/src/lib_storage/context.ml @@ -340,9 +340,24 @@ let hash ~time ?(message = "") context = Lwt.return x let commit ~time ?message context = - raw_commit ~time ?message context >>= fun commit -> - let h = GitStore.Commit.hash commit in - Lwt.return h + raw_commit ~time ?message context >|= fun commit -> + GitStore.Commit.hash commit + +let gc_in_progress = ref None + +let gc index ~roots = + let roots = Context_hash.Set.elements roots in + let switch = Lwt_switch.create () in + gc_in_progress := Some switch; + GitStore.gc ~repo:index.repo ~switch roots >|= fun stats -> + gc_in_progress := None; + Fmt.pr "GC: live objects: %a\n%!" GitStore.pp_stats stats + + +let close () = + match !gc_in_progress with + | None -> Lwt.return () + | Some s -> Lwt_switch.turn_off s (*-- Generic Store Primitives ------------------------------------------------*) diff --git a/src/lib_storage/context.mli b/src/lib_storage/context.mli index 309a0318aa51..f6be70833ca4 100644 --- a/src/lib_storage/context.mli +++ b/src/lib_storage/context.mli @@ -40,6 +40,10 @@ val init: string -> index Lwt.t +val close: unit -> unit Lwt.t + +val gc: index -> roots:Context_hash.Set.t -> unit Lwt.t + val commit_genesis: index -> chain_id:Chain_id.t -> diff --git a/src/lib_storage/test/test_context.ml b/src/lib_storage/test/test_context.ml index 224b70afe042..22cd565d0d71 100644 --- a/src/lib_storage/test/test_context.ml +++ b/src/lib_storage/test/test_context.ml @@ -112,6 +112,7 @@ let wrap_context_init f _ () = create_block3a idx block2 >>= fun block3a -> create_block3b idx block2 >>= fun block3b -> f { idx; genesis; block2 ; block3a; block3b } >>= fun result -> + Context.close () >>= fun () -> Lwt.return result end diff --git a/vendors/irmin-lmdb/dune b/vendors/irmin-lmdb/dune index ab0458feb96d..aa9e9066612e 100644 --- a/vendors/irmin-lmdb/dune +++ b/vendors/irmin-lmdb/dune @@ -1,5 +1,5 @@ (library (name irmin_lmdb) (public_name irmin-lmdb) - (libraries irmin lmdb) + (libraries irmin lmdb hashset lwt.unix ocplib-endian) (flags (:standard -safe-string))) diff --git a/vendors/irmin-lmdb/irmin-lmdb.opam b/vendors/irmin-lmdb/irmin-lmdb.opam index 4a0d64f0c990..65df13053491 100644 --- a/vendors/irmin-lmdb/irmin-lmdb.opam +++ b/vendors/irmin-lmdb/irmin-lmdb.opam @@ -15,6 +15,8 @@ run-test: ["dune" "runtest" "-p" name] depends: [ "dune" {build & >= "1.0.1"} + "ocplib-endian" {>= "1.0"} + "hashset" "irmin" {>= "1.4.0"} "lmdb" {>= "0.1"} ] diff --git a/vendors/irmin-lmdb/irmin_lmdb.ml b/vendors/irmin-lmdb/irmin_lmdb.ml index 276e29f9fa4b..a7eba25d1cde 100644 --- a/vendors/irmin-lmdb/irmin_lmdb.ml +++ b/vendors/irmin-lmdb/irmin_lmdb.ml @@ -22,6 +22,11 @@ module Option = struct let value_map ~default ~f = function | None -> default | Some v -> f v + + let get = function + | Some v -> v + | None -> failwith "no value" + end module Result = struct @@ -30,24 +35,33 @@ module Result = struct | Error err -> Error err end -let cstruct_of_ba_copy ba = - let cs = Cstruct.of_bigarray ba in - let len = Cstruct.len cs in - let cs_copy = Cstruct.create_unsafe len in - Cstruct.blit cs 0 cs_copy 0 len ; - cs_copy - open Lwt.Infix +type wtxn = Lmdb.rw Lmdb.txn * Lmdb.db + +(* The GC has 3 modes: + - normal: all reads and writes are done normally on the main database file. + - promotion: a (concurrent) promotion to a different database file is in + progresss. + - pivot: eg. "stop-the-world" all the operations are stopped, the database + files are swapped on disk. *) + +type mode = + | Normal + | Promotion + | Pivot + type t = { - db: Lmdb.t ; root: string ; - mutable wtxn: (Lmdb.rw Lmdb.txn * Lmdb.db) option; + readonly: bool; + mutable db: Lmdb.t ; + mutable gc_mode: mode; + mutable wtxn: wtxn option; } -let of_result = function - | Ok v -> Lwt.return v - | Error err -> Lwt.fail_with (Lmdb.string_of_error err) +let of_result op = function + | Ok v -> Lwt.return v + | Error err -> Fmt.kstrf Lwt.fail_with "%s: %a" op Lmdb.pp_error err let (|>>) v f = match v with @@ -63,27 +77,6 @@ let get_wtxn db = db.wtxn <- Some (txn, ddb); Ok (txn, ddb) -let commit_wtxn db = - match db.wtxn with - | None -> Ok () - | Some (t, _ddb) -> - db.wtxn <- None; - Lmdb.commit_txn t - -let add db k v = - get_wtxn db |>> fun (txn, ddb) -> - Lmdb.put_string txn ddb k v - -let add db k v = - of_result @@ add db k v - -let add_cstruct db k v = - get_wtxn db |>> fun (txn, ddb) -> - Lmdb.put txn ddb k (Cstruct.to_bigarray v) - -let add_cstruct db k v = - of_result @@ add_cstruct db k v - let src = Logs.Src.create "irmin.lmdb" ~doc:"Irmin in a Lmdb store" module Log = (val Logs.src_log src : Logs.LOG) @@ -105,8 +98,27 @@ module Conf = struct let root = Irmin.Private.Conf.root let mapsize = Irmin.Private.Conf.key "mapsize" int64_converter 409_600_000_000L - let readonly = - Irmin.Private.Conf.key "readonly" bool_converter false + let readonly = + Irmin.Private.Conf.key "readonly" bool_converter false + + type t = { + root : string ; + mapsize : int64 ; + readonly: bool ; + (* TODO *) + (* ?write_buffer_size:int -> *) + (* ?max_open_files:int -> *) + (* ?block_size:int -> *) + (* ?block_restart_interval:int -> *) + (* ?cache_size:int *) + } + + let of_config c = + let root = Irmin.Private.Conf.get c root in + let mapsize = Irmin.Private.Conf.get c mapsize in + let readonly = Irmin.Private.Conf.get c readonly in + let root = match root with None -> "irmin" | Some root -> root in + { root ; mapsize ; readonly } end @@ -117,7 +129,49 @@ let config let config = C.add config Conf.readonly readonly in Option.value_map mapsize ~default:config ~f:(C.add config Conf.mapsize) -type ('r) reader = { f : 'k. 'k Lmdb.txn -> Lmdb.db -> ('r, Lmdb.error) result } [@@unboxed] +let open_db ~root ~mapsize ~readonly = + if not (Sys.file_exists root) then Unix.mkdir root 0o755; + let flags = if readonly then [ Lmdb.RdOnly ] else [] in + let sync_flag = + match Sys.getenv_opt "TEZOS_CONTEXT_SYNC" with + | None -> [] + | Some s -> + match String.lowercase_ascii s with + | "nosync" -> [ Lmdb.NoSync ] + | "nometasync" -> [ Lmdb.NoMetaSync ] + | _ -> + Printf.eprintf "Unrecognized TEZOS_SYNC option : %s\n\ + allowed: nosync nometasync" s; + [] + in + let flags = sync_flag @ Lmdb.NoRdAhead :: Lmdb.NoTLS :: flags in + let file_flags = if readonly then 0o444 else 0o644 in + match Lmdb.opendir ~mapsize ~flags root file_flags with + | Ok db -> db + | Error err -> + Fmt.failwith "open {%s} %a" (Filename.basename root) Lmdb.pp_error err + +let dbs = Hashtbl.create 3 + +let make conf = + let { Conf.root ; mapsize ; readonly } = Conf.of_config conf in + try Hashtbl.find dbs (root, readonly) + with Not_found -> + let db = open_db ~root ~mapsize ~readonly in + let db = { + db; root; readonly; + gc_mode = Normal; + wtxn = None; + } in + Hashtbl.add dbs (root, readonly) db; + db + +let close t = + Hashtbl.remove dbs (t.root, t.readonly); + Lmdb.closedir t.db + +type ('r) reader = + { f : 'k. 'k Lmdb.txn -> Lmdb.db -> ('r, Lmdb.error) result } [@@unboxed] let with_read_db db ~f = match db.wtxn with @@ -126,51 +180,111 @@ let with_read_db db ~f = | Some (txn, ddb) -> f.f txn ddb -let mem db k = - with_read_db db ~f:{ f = fun txn db -> Lmdb.mem txn db k } |> - of_result +let get txn db k = + Result.map ~f:Cstruct.of_bigarray (Lmdb.get txn db k) let find_bind db k ~f = - match with_read_db db ~f:{ f = fun txn db -> Result.map ~f (Lmdb.get txn db k) } with - | Error KeyNotFound -> Lwt.return_none - | Error err -> Lwt.fail_with (Lmdb.string_of_error err) - | Ok v -> Lwt.return v + match + with_read_db db ~f:{ f = fun txn db -> Result.map ~f (get txn db k) } + with + | Error KeyNotFound -> Ok None + | Error err -> Error err + | Ok v -> Ok v + +module Raw = struct + + let mem db k = + with_read_db db ~f:{ f = fun txn db -> Lmdb.mem txn db k } + |> of_result "mem" + + let find db key of_ba = + find_bind db key ~f:(fun v -> Option.of_result (of_ba v)) + |> of_result "find" + + let add_string db k v = + (get_wtxn db |>> fun (txn, ddb) -> + Lmdb.put_string txn ddb k v) + |> of_result "add_string" + + let add_cstruct db k v = + (get_wtxn db |>> fun (txn, ddb) -> + Lmdb.put txn ddb k (Cstruct.to_bigarray v)) + |> of_result "add_ba" + + let add db k = function + | `String v -> add_string db k v + | `Cstruct v -> add_cstruct db k v + + let remove db k = + (get_wtxn db |>> fun (txn, ddb) -> + match Lmdb.del txn ddb k with + | Ok () | Error Lmdb.KeyNotFound -> Ok () + | x -> x) + |> of_result "remove" + + let commit op db = + (match db.wtxn with + | None -> Ok () + | Some (t, _ddb) -> + db.wtxn <- None; + Lmdb.commit_txn t) + |> of_result op + + let fsync db = + Lmdb.sync ~force:true db.db + |> of_result "fsync" -module Irmin_value_store - (M: Irmin.Metadata.S) - (H: Irmin.Hash.S) - (C: Irmin.Contents.S) - (P: Irmin.Path.S) = struct +end - module XContents = struct +module AO (K: Irmin.Hash.S) (V: Irmin.Contents.S0) (Conv: sig + val of_key: K.t -> string + val to_value: Cstruct.t -> (V.t, [`Msg of string]) result + val of_value: V.t -> [`String of string | `Cstruct of Cstruct.t] + val digest: V.t -> K.t + end) = struct - type nonrec t = t - type key = H.t - type value = C.t + include Conv - let lmdb_of_key h = - "contents/" ^ Cstruct.to_string (H.to_raw h) + type nonrec t = t + type key = K.t + type value = V.t - let mem db key = - let key = lmdb_of_key key in - mem db key + let mem db key = + Raw.mem db (Conv.of_key key) - let find db key = - let key = lmdb_of_key key in - find_bind db key ~f:begin fun v -> - Option.of_result (C.of_string Cstruct.(to_string (of_bigarray v))) - end + let unsafe_find db key = + Raw.find db (Conv.of_key key) @@ fun v -> + Conv.to_value v - let to_string = Fmt.to_to_string C.pp + let find db key = + unsafe_find db key - let add db v = - let k = H.digest C.t v in - let k_lmdb = lmdb_of_key k in - let v = to_string v in - add db k_lmdb v >|= fun () -> k + let unsafe_add db v = + let k = Conv.digest v in + let v = Conv.of_value v in + Raw.add db (Conv.of_key k) v >|= fun () -> + k + + let add db v = + unsafe_add db v + +end + +module Irmin_value_store + (M: Irmin.Metadata.S) + (H: Irmin.Hash.S) + (C: Irmin.Contents.S) + (P: Irmin.Path.S) = struct + module XContents = struct module Val = C module Key = H + include AO (Key) (Val) (struct + let of_key h = "contents/" ^ Cstruct.to_string (Key.to_raw h) + let to_value v = Val.of_string (Cstruct.(to_string v)) + let of_value s = `String (Fmt.to_to_string Val.pp s) + let digest v = Key.digest Val.t v + end) end module Contents = Irmin.Contents.Store(XContents) @@ -401,36 +515,25 @@ module Irmin_value_store let t = Irmin.Type.like N.t of_n to_n end - module AO = struct + include AO (Key) (Val) (struct - type nonrec t = t - type key = H.t - type value = Val.t + let of_key h = "node/" ^ Cstruct.to_string (H.to_raw h) - let lmdb_of_key h = - "node/" ^ Cstruct.to_string (H.to_raw h) + let to_value v = + Irmin.Type.decode_cstruct (Irmin.Type.list Val.entry_t) v - let mem db key = - let key = lmdb_of_key key in - mem db key + let of_value v = + let c = Irmin.Type.encode_cstruct (Irmin.Type.list Val.entry_t) v in + `Cstruct c - let of_cstruct v = - Irmin.Type.decode_cstruct (Irmin.Type.list Val.entry_t) v |> - Option.of_result + let digest v = + let v = Irmin.Type.encode_cstruct (Irmin.Type.list Val.entry_t) v in + H.digest Irmin.Type.cstruct v - let find db key = - let key = lmdb_of_key key in - find_bind db key ~f:(fun v -> of_cstruct (cstruct_of_ba_copy v)) - - let add db v = - let v = Irmin.Type.encode_cstruct (Irmin.Type.list Val.entry_t) v in - let k = H.digest Irmin.Type.cstruct v in - let k_lmdb = lmdb_of_key k in - add_cstruct db k_lmdb v >|= fun () -> k - end - include AO + end) end + module Node = Irmin.Private.Node.Store(Contents)(P)(M)(XNode) module XCommit = struct @@ -464,36 +567,19 @@ module Irmin_value_store module Key = H - module AO = struct - - let lmdb_of_key h = - "commit/" ^ Cstruct.to_string (H.to_raw h) - - type nonrec t = t - type key = H.t - type value = Val.t - - let mem db key = - let key = lmdb_of_key key in - mem db key - - let of_cstruct v = - Irmin.Type.decode_cstruct Val.t v |> - Option.of_result - - let find db key = - let key = lmdb_of_key key in - find_bind db key ~f:(fun v -> of_cstruct (cstruct_of_ba_copy v)) - - let add db v = - let v = Irmin.Type.encode_cstruct Val.t v in - let k = H.digest Irmin.Type.cstruct v in - let k_lmdb = lmdb_of_key k in - add_cstruct db k_lmdb v >>= fun () -> - of_result @@ commit_wtxn db >|= fun () -> k + include AO (Key) (Val) (struct + let of_key h = "commit/" ^ Cstruct.to_string (H.to_raw h) + let of_value v = `Cstruct (Irmin.Type.encode_cstruct Val.t v) + let to_value v = Irmin.Type.decode_cstruct Val.t v + let digest v = + let v = Irmin.Type.encode_cstruct Val.t v in + H.digest Irmin.Type.cstruct v + end) - end - include AO + let add db v = + add db v >>= fun k -> + Raw.commit "Commit.add" db >|= fun () -> + k end module Commit = Irmin.Private.Commit.Store(Node)(XCommit) @@ -523,49 +609,37 @@ module Irmin_branch_store (B: Branch) (H: Irmin.Hash.S) = struct module Val = H module W = Irmin.Private.Watch.Make(Key)(Val) + module L = Irmin.Private.Lock.Make(B) type nonrec t = { db: t; w: W.t; + l: L.t; } let watches = Hashtbl.create 10 type key = Key.t type value = Val.t - type watch = W.watch * (unit -> unit Lwt.t) + type watch = W.watch - (* let branch_of_lmdb r = *) - (* let str = String.trim @@ Git.Reference.to_raw r in *) - (* match B.of_ref str with *) - (* | Ok r -> Some r *) - (* | Error (`Msg _) -> None *) - - let lmdb_of_branch r = Fmt.to_to_string B.pp_ref r + let lmdb_of_branch r = "refs/" ^ Fmt.to_to_string B.pp_ref r + let hash_of_lmdb v = H.of_raw v + let lmdb_of_hash r = H.to_raw r let mem db r = - mem db.db (lmdb_of_branch r) - - let find db r = - find_bind db.db (lmdb_of_branch r) - ~f:(fun v -> Some (H.of_raw (cstruct_of_ba_copy v))) + Raw.mem db.db (lmdb_of_branch r) - let listen_dir _ = - Lwt.return (fun () -> Lwt.return_unit) + let unsafe_find db r = + Raw.find db.db (lmdb_of_branch r) (fun x -> Ok (hash_of_lmdb x)) - let watch_key t key ?init f = - listen_dir t >>= fun stop -> - W.watch_key t.w key ?init f >|= fun w -> - (w, stop) + let find db r = + unsafe_find db r - let watch t ?init f = - listen_dir t >>= fun stop -> - W.watch t.w ?init f >|= fun w -> - (w, stop) + let watch_key t key ?init f = W.watch_key t.w key ?init f - let unwatch t (w, stop) = - stop () >>= fun () -> - W.unwatch t.w w + let watch t ?init f = W.watch t.w ?init f + let unwatch t w = W.unwatch t.w w let v db (* ~head *) = let w = @@ -576,48 +650,53 @@ module Irmin_branch_store (B: Branch) (H: Irmin.Hash.S) = struct Hashtbl.add watches db.root w; w in - Lwt.return { db ; w } - - let list _ = Lwt.return_nil (* TODO, or not *) - - (* let write_index _t _gr _gk = *) - (* Lwt.return_unit *) - - let set _t r _k = - Log.debug (fun f -> f "update %a" B.pp r); - Lwt.return_unit - (* let gr = git_of_branch r in *) - (* let gk = git_of_commit k in *) - (* G.write_reference t.t gr gk >>= fun () -> *) - (* W.notify t.w r (Some k) >>= fun () -> *) - (* write_index t gr (Git.Hash.to_commit gk) *) - - let remove _t r = + let l = L.v () in + Lwt.return { db ; w; l } + + let list _ = + (* CR(samoht): normally this should return the references, but + Tezos don't use that function, so just skip it. *) + Lwt.return_nil (* TODO, or not *) + + let set_unsafe t r k = + let r = lmdb_of_branch r in + let k = lmdb_of_hash k in + Raw.add_cstruct t.db r k + + let set t r k = + Log.debug (fun f -> f "set %a" B.pp r); + L.with_lock t.l r @@ fun () -> + set_unsafe t r k >>= fun () -> + Raw.commit "set" t.db + + let remove_unsafe t r = + let r = lmdb_of_branch r in + Raw.remove t.db r + + let remove t r = Log.debug (fun f -> f "remove %a" B.pp r); - Lwt.return_unit - (* G.remove_reference t.t (git_of_branch r) >>= fun () -> *) - (* W.notify t.w r None *) + L.with_lock t.l r @@ fun () -> + remove_unsafe t r >>= fun () -> + Raw.commit "remove" t.db - let test_and_set _t _r ~test:_ ~set:_ = + let eq_hashes = Irmin.Type.equal H.t + + let test_and_set t r ~test ~set = Log.debug (fun f -> f "test_and_set"); - Lwt.return_true - (* let gr = git_of_branch r in *) - (* let c = function None -> None | Some h -> Some (git_of_commit h) in *) - (* G.test_and_set_reference t.t gr ~test:(c test) ~set:(c set) >>= fun b -> *) - (* (if b then W.notify t.w r set else Lwt.return_unit) >>= fun () -> *) - (* begin *) - (* We do not protect [write_index] because it can take a log - time and we don't want to hold the lock for too long. Would - be safer to grab a lock, although the expanded filesystem - is not critical for Irmin consistency (it's only a - convenience for the user). *) - (* if b then match set with *) - (* | None -> Lwt.return_unit *) - (* | Some v -> write_index t gr (Git.Hash.to_commit (git_of_commit v)) *) - (* else *) - (* Lwt.return_unit *) - (* end >|= fun () -> *) - (* b *) + L.with_lock t.l r @@ fun () -> + find t r >>= fun v -> + let set () = + (match set with + | None -> remove_unsafe t r + | Some v -> set_unsafe t r v) + >>= fun () -> + Raw.commit "test_and_set" t.db >|= fun () -> + true + in + match test, v with + | None , None -> set () + | Some v, Some v'-> if eq_hashes v v' then set () else Lwt.return false + | __ -> Lwt.return false end @@ -654,55 +733,234 @@ module Make let node_t t : Node.t = contents_t t, t.db let commit_t t : Commit.t = node_t t, t.db - type config = { - root : string option ; - mapsize : int64 ; - readonly : bool ; - (* TODO *) - (* ?write_buffer_size:int -> *) - (* ?max_open_files:int -> *) - (* ?block_size:int -> *) - (* ?block_restart_interval:int -> *) - (* ?cache_size:int *) - } - - let config c = - let root = Irmin.Private.Conf.get c Conf.root in - let mapsize = Irmin.Private.Conf.get c Conf.mapsize in - let readonly = Irmin.Private.Conf.get c Conf.readonly in - { root ; mapsize ; readonly } - - let v conf = - let { root ; mapsize ; readonly } = config conf in - let root = match root with None -> "irmin.ldb" | Some root -> root in - if not (Sys.file_exists root) then Unix.mkdir root 0o755 ; - let flags = if readonly then [ Lmdb.RdOnly ] else [] in - let sync_flag = - match Sys.getenv_opt "TEZOS_CONTEXT_SYNC" with - | None -> [] - | Some s -> - match String.lowercase_ascii s with - | "nosync" -> [ Lmdb.NoSync ] - | "nometasync" -> [ Lmdb.NoMetaSync ] - | _ -> - Printf.eprintf "Unrecognized TEZOS_SYNC option : %s\n\ - allowed: nosync nometasync" s; - [] - in - let flags = sync_flag @ Lmdb.NoRdAhead :: Lmdb.NoTLS :: flags in - let file_flags = if readonly then 0o444 else 0o644 in - match Lmdb.opendir ~mapsize ~flags root file_flags with - | Error err -> Lwt.fail_with (Lmdb.string_of_error err) - | Ok db -> - let db = { db ; root ; wtxn = None } in - Branch.v db >|= fun branch -> - { db; branch; config = conf } + let v config = + let db = make config in + Branch.v db >|= fun branch -> + { db; branch; config } end end include Irmin.Make_ext(P) -end + type stats = { + mutable promoted_contents: int; + mutable promoted_nodes : int; + mutable promoted_commits: int; + mutable width: int; + mutable depth: int; + } + + let promoted_contents t = t.promoted_contents + let promoted_nodes t = t.promoted_nodes + let promoted_commits t = t.promoted_commits + + let pp_stats ppf t = + Fmt.pf ppf "[%d blobs/%d nodes/%d commits] depth:%d width:%d" + t.promoted_contents + t.promoted_nodes + t.promoted_commits + t.depth + t.width + + let stats () = { + promoted_contents = 0; + promoted_nodes = 0; + promoted_commits = 0; + width = 0; + depth = 0; + } + + (* poor-man GC *) + module Irmin_GC = struct + + let incr_contents s = s.promoted_contents <- s.promoted_contents + 1 + let incr_nodes s = s.promoted_nodes <- s.promoted_nodes + 1 + let incr_commits s = s.promoted_commits <- s.promoted_commits + 1 + let update_width s c = s.width <- max s.width (List.length c) + let update_depth s n = s.depth <- max s.depth n + + module Tbl = Hashset.Make(struct + type t = string + let equal x y = String.equal x y + let hash t = + assert (String.length t > H.digest_size); + EndianString.NativeEndian.get_int64 t (String.length t - 8) + |> Int64.to_int + end) + + type t = { + tbl : Tbl.t; + new_db: P.Contents.t; + old_db: P.Contents.t; + stats : stats; + switch: Lwt_switch.t option; + } + + let close t = + close t.new_db; + close t.old_db + + let new_root repo = + let c = Conf.of_config repo.P.Repo.config in + c.Conf.root ^ ".1" + + let v ?switch repo = + let config = + let root = new_root repo in + Irmin.Private.Conf.add repo.P.Repo.config Conf.root (Some root) + in + let new_db = make config in + let tbl = Tbl.create 10_123 in + let stats = stats () in + { tbl; stats; new_db; old_db = repo.db; switch } + + let promote_val t k v = + Raw.add_cstruct t.new_db k v + + let promote msg t k = + Raw.find t.old_db k (fun x -> Ok x) >>= function + | Some v -> promote_val t k v + | None -> + let k = H.of_raw (Cstruct.of_string k) in + Fmt.failwith "promote %s: cannot promote key %a\n%!" msg H.pp k + + let mem t k = + if Tbl.mem t.tbl k then Lwt.return true + else Raw.mem t.new_db k + + let copy_contents t k = + Lwt_switch.check t.switch; + let k = P.XContents.of_key k in + mem t k >>= function + | true -> Lwt.return () + | false -> + Tbl.add t.tbl k; + incr_contents t.stats; + promote "contents" t k + + let copy_node t k = + let rec aux x = + Lwt_switch.check t.switch; + match x with + | [] -> Lwt.return () + | (ks, _, `Black, _) :: todo -> + promote "node" t ks >>= fun () -> + aux todo + | (ks, k, `Gray , n) :: todo -> + mem t ks >>= function + | true -> aux todo + | false -> + Tbl.add t.tbl ks; + P.XNode.unsafe_find t.old_db k >|= Option.get >>= fun v -> + let children = P.Node.Val.list v in + incr_nodes t.stats; + update_width t.stats children; + update_depth t.stats n; + let todo = ref ((ks, k, `Black, n) :: todo) in + Lwt_list.iter_p (fun (_, c) -> match c with + | `Contents (k, _) -> copy_contents t k + | `Node k -> + let ks = P.XNode.of_key k in + todo := (ks, k, `Gray, n+1) :: !todo; + Lwt.return () + ) children + >>= fun () -> + aux !todo + in + let ks = P.XNode.of_key k in + mem t ks >>= function + | true -> Lwt.return () + | false -> aux [ks, k, `Gray, 0] + + let copy_commit t k = + Lwt_switch.check t.switch; + P.XCommit.unsafe_find t.old_db k >|= Option.get >>= fun v -> + let k = P.XCommit.of_key k in + (* we ignore the parents *) + copy_node t (P.Commit.Val.node v) >>= fun () -> + incr_commits t.stats; + promote "commit" t k + + let root repo = + let c = repo.P.Repo.config in + match Irmin.Private.Conf.get c Conf.root with + | None -> "context" + | Some r -> r + + let pivot ~branches repo t = + let rename () = + let old_data = Filename.concat (root repo) "data.mdb" in + let new_data = Filename.concat (new_root repo) "data.mdb" in + let old_lock = Filename.concat (root repo) "lock.mdb" in + let new_lock = Filename.concat (new_root repo) "lock.mdb" in + Lwt_unix.rename new_data old_data >>= fun () -> + Lwt_unix.unlink new_lock >>= fun () -> + Lwt_unix.unlink old_lock + in + + (* promote the live refs *) + Lwt_list.iter_p (fun br -> + let k = P.Branch.lmdb_of_branch br in + promote "refs" t k + ) branches + >>= fun () -> + + (* fsync *) + Raw.commit "pivot" t.new_db >>= fun () -> + Raw.fsync t.new_db >>= fun () -> + + (* rename *) + close t; + rename () >>= fun () -> + + (* re-open the database *) + P.Repo.v repo.config >|= fun x -> + repo.db.db <- x.db.db -include Conf + end + + let promote_all ~(repo:repo) ?before_pivot ~branches t roots = + repo.db.gc_mode <- Promotion; + let init_time = Unix.gettimeofday () in + let last_time = ref init_time in + Lwt_list.iteri_s (fun i k -> + Irmin_GC.copy_commit t k >>= fun () -> + let current_time = Unix.gettimeofday () in + if current_time -. !last_time > 5. (* print something every 5s *) + || i = 0 || i = List.length roots - 1 + then ( + last_time := current_time; + Fmt.pr "GC: %d min elapsed - %5d/%d %a\n%!" + (int_of_float ((!last_time -. init_time) /. 60.)) + (i+1) (List.length roots) pp_stats t.stats; + (* flush to disk regularly to not hold too much data into RAM *) + if i mod 1000 = 0 then ( + Irmin_GC.Tbl.clear t.tbl; + Raw.commit "flush roots" t.new_db + ) else + Lwt.return () + ) else + Lwt.return (); + ) roots + >>= fun () -> + (match before_pivot with + | None -> Lwt.return () + | Some t -> t () + ) >>= fun () -> + repo.db.gc_mode <- Pivot; + Irmin_GC.pivot ~branches repo t >|= fun () -> + repo.db.gc_mode <- Normal; + t.stats + + let gc ~repo ?before_pivot ?(branches=[]) ?switch roots = + let t, w = Lwt.task () in + Lwt_switch.add_hook switch (fun () -> t); + let gc = Irmin_GC.v ?switch repo in + Lwt.catch + (fun () -> promote_all ~repo ?before_pivot ~branches gc roots) + (function + | Lwt_switch.Off -> Lwt.wakeup w (); Lwt.return gc.stats + | e -> Lwt.fail e) + +end diff --git a/vendors/irmin-lmdb/irmin_lmdb.mli b/vendors/irmin-lmdb/irmin_lmdb.mli index 34a1a3cbdb4f..64511f00f980 100644 --- a/vendors/irmin-lmdb/irmin_lmdb.mli +++ b/vendors/irmin-lmdb/irmin_lmdb.mli @@ -20,4 +20,36 @@ val config: ?config:Irmin.config -> ?mapsize:int64 -> ?readonly:bool -> string -> Irmin.config -module Make : Irmin.S_MAKER +module Make + (M: Irmin.Metadata.S) + (C: Irmin.Contents.S) + (P: Irmin.Path.S) + (B: Irmin.Branch.S) + (H: Irmin.Hash.S): sig + include Irmin.S + with type key = P.t + and type step = P.step + and module Key = P + and type metadata = M.t + and type contents = C.t + and type branch = B.t + and type Commit.Hash.t = H.t + and type Tree.Hash.t = H.t + and type Contents.Hash.t = H.t + + type stats + + val promoted_contents: stats -> int + val promoted_nodes: stats -> int + val promoted_commits: stats -> int + + val pp_stats: stats Fmt.t + + val gc: + repo:Repo.t -> + ?before_pivot:(unit -> unit Lwt.t) -> + ?branches:B.t list -> + ?switch:Lwt_switch.t -> + Commit.hash list -> stats Lwt.t + +end diff --git a/vendors/irmin-lmdb/test/dune b/vendors/irmin-lmdb/test/dune new file mode 100644 index 000000000000..17dfc25abf4a --- /dev/null +++ b/vendors/irmin-lmdb/test/dune @@ -0,0 +1,3 @@ +(executable + (name test) + (libraries irmin-lmdb alcotest-lwt)) diff --git a/vendors/irmin-lmdb/test/test.ml b/vendors/irmin-lmdb/test/test.ml new file mode 100644 index 000000000000..08ffc27bdb42 --- /dev/null +++ b/vendors/irmin-lmdb/test/test.ml @@ -0,0 +1,131 @@ +open Irmin +open Lwt.Infix + +module Store = Irmin_lmdb.Make + (Metadata.None) + (Contents.String) + (Path.String_list) + (Branch.String) + (Hash.SHA1) + +let config = Irmin_lmdb.config "/tmp/irmin-lmdb" + +let date = ref 0L + +let info () = + date := Int64.add !date 1L; + Info.v ~date:!date ~author:"test lmdb" "commit" + +let fill t entries = + Lwt_list.iter_s (fun (k, v) -> Store.set ~info t k v) entries + +let store () = + Store.Repo.v config >>= fun repo -> + Store.Branch.remove repo Store.Branch.master >>= fun () -> + Store.master repo + +let branches = [Store.Branch.master] + +let test_basics _switch () = + let promote_1 () = + store () >>= fun t -> + fill t [ + ["foo"] , "a"; + ["bar";"toto"], "1"; + ["bar";"titi"], "2"; + ] >>= fun () -> + Store.Head.get t >>= fun root -> + Store.gc ~branches ~repo:(Store.repo t) ~keep:1 root >>= fun stats -> + Alcotest.(check int) "1: promoted commits" 1 (Store.promoted_commits stats); + Alcotest.(check int) "1: promoted nodes" 2 (Store.promoted_nodes stats); + Alcotest.(check int) "1: promoted contents" 3 (Store.promoted_contents stats); + Store.find t ["foo"] >>= fun v -> + Alcotest.(check (option string)) "foo" (Some "a") v; + Store.find t ["bar";"toto"] >>= fun v -> + Alcotest.(check (option string)) "bar/toto" (Some "1") v; + Store.find t ["bar";"titi"] >>= fun v -> + Alcotest.(check (option string)) "bar/titi" (Some "2") v; + Store.Commit.parents root >|= function + | [] -> () + | _ -> Alcotest.fail "parent should not exists!" + in + let promote_2 () = + store () >>= fun t -> + fill t [ + ["foo"] , "a"; + ["bar";"toto"], "1"; + ["bar";"titi"], "2"; + ["bar";"titi"], "3"; + ] >>= fun () -> + Store.Head.get t >>= fun root -> + Store.gc ~branches ~repo:(Store.repo t) ~keep:2 root >>= fun stats -> + Alcotest.(check int) "2: promoted commits" 2 (Store.promoted_commits stats); + Alcotest.(check int) "2: promoted nodes" 4 (Store.promoted_nodes stats); + Alcotest.(check int) "2: promoted contents" 4 (Store.promoted_contents stats); + Store.find t ["foo"] >>= fun v -> + Alcotest.(check (option string)) "foo" (Some "a") v; + Store.find t ["bar";"toto"] >>= fun v -> + Alcotest.(check (option string)) "bar/toto" (Some "1") v; + Store.find t ["bar";"titi"] >>= fun v -> + Alcotest.(check (option string)) "bar/titi" (Some "3") v; + Store.Commit.parents root >>= function + | [] -> Alcotest.fail "parent should exist" + | [p] -> + (Store.Commit.parents p >|= function + | [] -> () + | _ -> Alcotest.fail "grand-parents should not exist") + | _ -> Alcotest.fail "too many parents!" + in + promote_1 () >>= + promote_2 + +let test_basics_loop sw () = + let rec loop = function + | 0 -> Lwt.return () + | n -> + test_basics sw () >>= fun () -> + loop (n-1) + in + loop 100 + +let test_concurrency _ () = + store () >>= fun t -> + fill t [ + ["foo"] , "a"; + ["bar";"toto"], "1"; + ["bar";"titi"], "2"; + ["bar";"titi"], "3"; + ] >>= fun () -> + let before_pivot () = + fill t [ + ["bar"; "yo0"], "a"; + ["bar"; "yo1"], "a"; + ["bar"; "yo1"], "b"; + ] >>= fun () -> + (* check that we can read the new values *) + Store.get t ["bar";"yo1"] >|= fun v -> + Alcotest.(check string) "writing to the store still works" "b" v; + in + Store.Head.get t >>= fun root -> + let wait_for_gc, gc = Lwt.task () in + Lwt.async (fun () -> + Store.gc ~branches ~repo:(Store.repo t) ~before_pivot ~keep:2 root >|= fun _stats -> + Lwt.wakeup gc () + ); + wait_for_gc >>= fun () -> + + (* check that the new values are there *) + + Store.get t ["bar";"titi"] >>= fun v -> + Alcotest.(check string) "bar/titi" "3" v; + Store.get t ["bar";"yo1"] >|= fun v -> + Alcotest.(check string) "new values are still here after a pivot" "b" v + +let () = + Alcotest.run "irmin-lmdb" [ + "GC", [ + Alcotest_lwt.test_case "basics" `Quick test_basics; + Alcotest_lwt.test_case "basic loop" `Quick test_basics_loop; + Alcotest_lwt.test_case "concurrency" `Quick test_concurrency; + + ]] -- GitLab From cfcea9b52d376dcc5c4647693bc6592ba6e00110 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Fri, 21 Dec 2018 19:03:41 +0100 Subject: [PATCH 2/5] irmin-lmdb/GC: vendor hashset From: https://github.com/backtracking/hashset commit: f2b4b3bf7d8482cbaf76142162d131a120645a0b --- vendors/irmin-lmdb/dune | 2 +- vendors/irmin-lmdb/hashset.ml | 174 +++++++++++++++++++++++++++++ vendors/irmin-lmdb/hashset.mli | 107 ++++++++++++++++++ vendors/irmin-lmdb/irmin-lmdb.opam | 1 - 4 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 vendors/irmin-lmdb/hashset.ml create mode 100644 vendors/irmin-lmdb/hashset.mli diff --git a/vendors/irmin-lmdb/dune b/vendors/irmin-lmdb/dune index aa9e9066612e..8a5132794f40 100644 --- a/vendors/irmin-lmdb/dune +++ b/vendors/irmin-lmdb/dune @@ -1,5 +1,5 @@ (library (name irmin_lmdb) (public_name irmin-lmdb) - (libraries irmin lmdb hashset lwt.unix ocplib-endian) + (libraries irmin lmdb lwt.unix ocplib-endian) (flags (:standard -safe-string))) diff --git a/vendors/irmin-lmdb/hashset.ml b/vendors/irmin-lmdb/hashset.ml new file mode 100644 index 000000000000..5033100ea2f6 --- /dev/null +++ b/vendors/irmin-lmdb/hashset.ml @@ -0,0 +1,174 @@ +(**************************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file LICENSE. *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(**************************************************************************) + +(* Sets as hash tables. Code adapted from ocaml's Hashtbl. *) + +(* We do dynamic hashing, and resize the table and rehash the elements + when buckets become too long. *) + +type 'a t = + { mutable size: int; (* number of elements *) + mutable data: 'a list array } (* the buckets *) + +let create initial_size = + let s = min (max 1 initial_size) Sys.max_array_length in + { size = 0; data = Array.make s [] } + +let clear h = + for i = 0 to Array.length h.data - 1 do + h.data.(i) <- [] + done; + h.size <- 0 + +let copy h = + { size = h.size; + data = Array.copy h.data } + +let resize hashfun tbl = + let odata = tbl.data in + let osize = Array.length odata in + let nsize = min (2 * osize + 1) Sys.max_array_length in + if nsize <> osize then begin + let ndata = Array.create nsize [] in + let rec insert_bucket = function + [] -> () + | key :: rest -> + insert_bucket rest; (* preserve original order of elements *) + let nidx = (hashfun key) mod nsize in + ndata.(nidx) <- key :: ndata.(nidx) in + for i = 0 to osize - 1 do + insert_bucket odata.(i) + done; + tbl.data <- ndata; + end + +let add h key = + let i = (Hashtbl.hash key) mod (Array.length h.data) in + let bucket = h.data.(i) in + if not (List.mem key bucket) then begin + h.data.(i) <- key :: bucket; + h.size <- succ h.size; + if h.size > Array.length h.data lsl 1 then resize Hashtbl.hash h + end + +let remove h key = + let rec remove_bucket = function + [] -> + [] + | k :: next -> + if k = key + then begin h.size <- pred h.size; next end + else k :: remove_bucket next in + let i = (Hashtbl.hash key) mod (Array.length h.data) in + h.data.(i) <- remove_bucket h.data.(i) + +let mem h key = + List.mem key h.data.((Hashtbl.hash key) mod (Array.length h.data)) + +let cardinal h = + let d = h.data in + let c = ref 0 in + for i = 0 to Array.length d - 1 do + c := !c + List.length d.(i) + done; + !c + +let iter f h = + let d = h.data in + for i = 0 to Array.length d - 1 do + List.iter f d.(i) + done + +let fold f h init = + let rec do_bucket b accu = + match b with + [] -> + accu + | k :: rest -> + do_bucket rest (f k accu) in + let d = h.data in + let accu = ref init in + for i = 0 to Array.length d - 1 do + accu := do_bucket d.(i) !accu + done; + !accu + +(* Functorial interface *) + +module type HashedType = + sig + type t + val equal: t -> t -> bool + val hash: t -> int + end + +module type S = + sig + type elt + type t + val create: int -> t + val clear: t -> unit + val copy: t -> t + val add: t -> elt -> unit + val remove: t -> elt -> unit + val mem : t -> elt -> bool + val cardinal: t -> int + val iter: (elt -> unit) -> t -> unit + val fold: (elt -> 'a -> 'a) -> t -> 'a -> 'a + end + +module Make(H: HashedType): (S with type elt = H.t) = + struct + type elt = H.t + type set = elt t + type t = set + let create = create + let clear = clear + let copy = copy + + let safehash key = (H.hash key) land max_int + + let rec mem_in_bucket key = function + | [] -> false + | x :: r -> H.equal key x || mem_in_bucket key r + + let add h key = + let i = (safehash key) mod (Array.length h.data) in + let bucket = h.data.(i) in + if not (mem_in_bucket key bucket) then begin + h.data.(i) <- key :: bucket; + h.size <- succ h.size; + if h.size > Array.length h.data lsl 1 then resize safehash h + end + + let remove h key = + let rec remove_bucket = function + [] -> + [] + | k :: next -> + if H.equal k key + then begin h.size <- pred h.size; next end + else k :: remove_bucket next in + let i = (safehash key) mod (Array.length h.data) in + h.data.(i) <- remove_bucket h.data.(i) + + let mem h key = + mem_in_bucket key h.data.((safehash key) mod (Array.length h.data)) + + let cardinal = cardinal + + let iter = iter + let fold = fold + end diff --git a/vendors/irmin-lmdb/hashset.mli b/vendors/irmin-lmdb/hashset.mli new file mode 100644 index 000000000000..5d7eb8aba898 --- /dev/null +++ b/vendors/irmin-lmdb/hashset.mli @@ -0,0 +1,107 @@ +(**************************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file LICENSE. *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(**************************************************************************) + +(* This module implements imperative sets as hash tables. + Operations like union, intersection or difference are not provided, + since there is no way to implement them easily (i.e. more easily than + iterating over sets). *) + +(*s Generic interface *) + +type 'a t +(* The type of sets. Elements have type ['a]. *) + +val create : int -> 'a t +(* [Hashset.create n] creates a new, empty set. + For best results, [n] should be on the + order of the expected number of elements that will be in + the set. The internal structure grows as needed, so [n] is just an + initial guess. *) + +val clear : 'a t -> unit +(* Empty a set. *) + +val add : 'a t -> 'a -> unit +(* [Hashset.add s x] adds [x] into the set [s]. *) + +val copy : 'a t -> 'a t +(* Return a copy of the given set. *) + +val mem : 'a t -> 'a -> bool +(* [Hashset.mem s x] checks if [x] belongs to [s]. *) + +val remove : 'a t -> 'a -> unit +(* [Hashset.remove s x] removes [x] from [s]. + It does nothing if [x] does not belong to [s]. *) + +val cardinal : 'a t -> int +(* [Hashset.cardinal s] returns the cardinal of [s]. *) + +val iter : ('a -> unit) -> 'a t -> unit +(* [Hashset.iter f s] applies [f] to all elements in [s]. *) + +val fold : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(* [Hashset.fold f s init] computes + [(f eN ... (f e1 init)...)], + where [e1 ... eN] are the elements in [s]. + The order in which the elements are passed to [f] is unspecified. *) + + +(*s Functorial interface *) + +module type HashedType = + sig + type t + (* The type of the elements. *) + val equal : t -> t -> bool + (* The equality predicate used to compare elements. *) + val hash : t -> int + (* A hashing function on elements. It must be such that if two elements are + equal according to [equal], then they have identical hash values + as computed by [hash]. + Examples: suitable ([equal], [hash]) pairs for arbitrary element + types include + ([(=)], {!Hashset.hash}) for comparing objects by structure, and + ([(==)], {!Hashset.hash}) for comparing objects by addresses + (e.g. for mutable or cyclic keys). *) + end + +(* The input signature of the functor {!Hashset.Make}. *) + +module type S = + sig + type elt + type t + val create : int -> t + val clear : t -> unit + val copy : t -> t + val add : t -> elt -> unit + val remove : t -> elt -> unit + val mem : t -> elt -> bool + val cardinal : t -> int + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + end +(* The output signature of the functor {!Hashset.Make}. *) + +module Make (H : HashedType) : S with type elt = H.t +(* Functor building an implementation of the hashtable structure. + The functor [Hashset.Make] returns a structure containing + a type [elt] of elements and a type [t] of hash sets. + The operations perform similarly to those of the generic + interface, but use the hashing and equality functions + specified in the functor argument [H] instead of generic + equality and hashing. *) + diff --git a/vendors/irmin-lmdb/irmin-lmdb.opam b/vendors/irmin-lmdb/irmin-lmdb.opam index 65df13053491..ea204ca71bab 100644 --- a/vendors/irmin-lmdb/irmin-lmdb.opam +++ b/vendors/irmin-lmdb/irmin-lmdb.opam @@ -16,7 +16,6 @@ run-test: ["dune" "runtest" "-p" name] depends: [ "dune" {build & >= "1.0.1"} "ocplib-endian" {>= "1.0"} - "hashset" "irmin" {>= "1.4.0"} "lmdb" {>= "0.1"} ] -- GitLab From 511ed3b860bd740ceddc55693ac9b49df9c53457 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Fri, 21 Dec 2018 19:09:20 +0100 Subject: [PATCH 3/5] irmin-lmdb/GC: fix upstream compilation warning for hashset --- vendors/irmin-lmdb/hashset.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendors/irmin-lmdb/hashset.ml b/vendors/irmin-lmdb/hashset.ml index 5033100ea2f6..020d07094582 100644 --- a/vendors/irmin-lmdb/hashset.ml +++ b/vendors/irmin-lmdb/hashset.ml @@ -41,7 +41,7 @@ let resize hashfun tbl = let osize = Array.length odata in let nsize = min (2 * osize + 1) Sys.max_array_length in if nsize <> osize then begin - let ndata = Array.create nsize [] in + let ndata = Array.make nsize [] in let rec insert_bucket = function [] -> () | key :: rest -> -- GitLab From 616bb7cb4934a591a9b09b3fda13e2ab813eb148 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Sat, 19 Jan 2019 17:57:51 +0100 Subject: [PATCH 4/5] irmin-lmdb/GC: compress the disk representation of nodes Use Irmin 2.0 compact representation, while keeping the same hash as before (e.g. it is possible to compact the current database without changing the hashes and to keep both v1 and v2 objects in the database) --- vendors/irmin-lmdb/irmin_lmdb.ml | 82 +- vendors/irmin-lmdb/irmin_lmdb.mli | 4 - vendors/irmin-lmdb/irmin_v2_type.ml | 1405 ++++++++++++++++++++++++++ vendors/irmin-lmdb/irmin_v2_type.mli | 144 +++ 4 files changed, 1621 insertions(+), 14 deletions(-) create mode 100644 vendors/irmin-lmdb/irmin_v2_type.ml create mode 100644 vendors/irmin-lmdb/irmin_v2_type.mli diff --git a/vendors/irmin-lmdb/irmin_lmdb.ml b/vendors/irmin-lmdb/irmin_lmdb.ml index a7eba25d1cde..3e3ab95aea89 100644 --- a/vendors/irmin-lmdb/irmin_lmdb.ml +++ b/vendors/irmin-lmdb/irmin_lmdb.ml @@ -325,6 +325,32 @@ module Irmin_value_store |+ field "node" H.t (fun { node ; _ } -> node) |> sealr + let hash_v2_t = + let open Irmin_v2_type in + like ~cli:(H.pp, H.of_string) + (string_of (`Fixed H.digest_size)) + (fun x -> H.of_raw (Cstruct.of_string x)) + (fun x -> Cstruct.to_string (H.to_raw x)) + + let metadata_v2_t = + Irmin_v2_type.(like unit) (fun _ -> M.default) (fun _ -> ()) + + let entry_v2_t = + let open Irmin_v2_type in + record "Tree.entry" + (fun kind name node -> + let kind = + match kind with + | None -> `Node + | Some m -> `Contents m in + { kind ; name ; node } ) + |+ field "kind" (option metadata_v2_t) (function + | { kind = `Node ; _ } -> None + | { kind = `Contents m ; _ } -> Some m) + |+ field "name" (string_of `Int8) (fun { name ; _ } -> name) + |+ field "node" hash_v2_t (fun { node ; _ } -> node) + |> sealr + let value_t = let open Irmin.Type in variant "Tree.value" (fun node contents -> function @@ -515,18 +541,40 @@ module Irmin_value_store let t = Irmin.Type.like N.t of_n to_n end + let v1_t = Irmin.Type.list Val.entry_t + + type entries = { version: int; entries: Val.entry list } + + let v2_t = + let open Irmin_v2_type in + record "entries" (fun v entries -> { version = Char.code v; entries }) + |+ field "version" char (fun t -> Char.chr t.version) + |+ field "entries" (list ~len:`Int16 Val.entry_v2_t) (fun t -> t.entries) + |> sealr + + let version v = match Cstruct.get_uint8 v 0 with + | 0 -> `V1 + | 1 -> `V2 + | n -> Fmt.failwith "Unsuppported node version: %d" n + include AO (Key) (Val) (struct let of_key h = "node/" ^ Cstruct.to_string (H.to_raw h) - let to_value v = - Irmin.Type.decode_cstruct (Irmin.Type.list Val.entry_t) v + let to_value v = match version v with + | `V1 -> Irmin.Type.decode_cstruct v1_t v + | `V2 -> + match Irmin_v2_type.decode_bin v2_t (Cstruct.to_string v) with + | Ok t -> Ok t.entries + | Error _ as e -> e let of_value v = - let c = Irmin.Type.encode_cstruct (Irmin.Type.list Val.entry_t) v in - `Cstruct c + (* always use v2 encoding to write new values *) + let c = Irmin_v2_type.encode_bin v2_t { entries = v; version = 1 } in + `String c let digest v = + (* use v1 encoding for the digest *) let v = Irmin.Type.encode_cstruct (Irmin.Type.list Val.entry_t) v in H.digest Irmin.Type.cstruct v @@ -747,18 +795,16 @@ module Make mutable promoted_contents: int; mutable promoted_nodes : int; mutable promoted_commits: int; + mutable upgraded_nodes : int; mutable width: int; mutable depth: int; } - let promoted_contents t = t.promoted_contents - let promoted_nodes t = t.promoted_nodes - let promoted_commits t = t.promoted_commits - let pp_stats ppf t = - Fmt.pf ppf "[%d blobs/%d nodes/%d commits] depth:%d width:%d" + Fmt.pf ppf "[%d blobs/%d nodes (%d upgrades)/%d commits] depth:%d width:%d" t.promoted_contents t.promoted_nodes + t.upgraded_nodes t.promoted_commits t.depth t.width @@ -767,6 +813,7 @@ module Make promoted_contents = 0; promoted_nodes = 0; promoted_commits = 0; + upgraded_nodes = 0; width = 0; depth = 0; } @@ -776,6 +823,7 @@ module Make let incr_contents s = s.promoted_contents <- s.promoted_contents + 1 let incr_nodes s = s.promoted_nodes <- s.promoted_nodes + 1 + let incr_upgraded_nodes s = s.upgraded_nodes <- s.upgraded_nodes + 1 let incr_commits s = s.promoted_commits <- s.promoted_commits + 1 let update_width s c = s.width <- max s.width (List.length c) let update_depth s n = s.depth <- max s.depth n @@ -818,9 +866,23 @@ module Make let promote_val t k v = Raw.add_cstruct t.new_db k v + let is_node k = String.length k > 4 && String.sub k 0 4 = "node" + + let upgrade_node t v = match P.XNode.version v with + | `V2 -> `Cstruct v + | `V1 -> + incr_upgraded_nodes t.stats; + match P.XNode.to_value v with + | Ok v -> P.XNode.of_value v + | Error (`Msg e) -> + Fmt.failwith "Cannot upgrade node %S: %s\n%!" + (Cstruct.to_string v) e + let promote msg t k = Raw.find t.old_db k (fun x -> Ok x) >>= function - | Some v -> promote_val t k v + | Some v -> + if is_node k then Raw.add t.new_db k( upgrade_node t v) + else promote_val t k v | None -> let k = H.of_raw (Cstruct.of_string k) in Fmt.failwith "promote %s: cannot promote key %a\n%!" msg H.pp k diff --git a/vendors/irmin-lmdb/irmin_lmdb.mli b/vendors/irmin-lmdb/irmin_lmdb.mli index 64511f00f980..697c2d323f8d 100644 --- a/vendors/irmin-lmdb/irmin_lmdb.mli +++ b/vendors/irmin-lmdb/irmin_lmdb.mli @@ -39,10 +39,6 @@ module Make type stats - val promoted_contents: stats -> int - val promoted_nodes: stats -> int - val promoted_commits: stats -> int - val pp_stats: stats Fmt.t val gc: diff --git a/vendors/irmin-lmdb/irmin_v2_type.ml b/vendors/irmin-lmdb/irmin_v2_type.ml new file mode 100644 index 000000000000..937189c49932 --- /dev/null +++ b/vendors/irmin-lmdb/irmin_v2_type.ml @@ -0,0 +1,1405 @@ +(* + * Copyright (c) 2016-2017 Thomas Gazagnaire + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +type (_, _) eq = Refl: ('a, 'a) eq + +module Witness : sig + type 'a t + val make : unit -> 'a t + val eq : 'a t -> 'b t -> ('a, 'b) eq option +end = struct + + type _ equality = .. + + module type Inst = sig + type t + type _ equality += Eq : t equality + end + + type 'a t = (module Inst with type t = 'a) + + let make: type a. unit -> a t = fun () -> + let module Inst = struct + type t = a + type _ equality += Eq : t equality + end + in + (module Inst) + + let eq: type a b. a t -> b t -> (a, b) eq option = + fun (module A) (module B) -> + match A.Eq with + | B.Eq -> Some Refl + | _ -> None + +end + + +module Json = struct + + type decoder = { + mutable lexemes: Jsonm.lexeme list; + d: Jsonm.decoder; + } + + let decoder ?encoding src = { lexemes = []; d = Jsonm.decoder ?encoding src } + let decoder_of_lexemes lexemes = { lexemes; d = Jsonm.decoder (`String "") } + let rewind e l = e.lexemes <- l :: e.lexemes + + let decode e = + match e.lexemes with + | h::t -> e.lexemes <- t; `Lexeme h + | [] -> Jsonm.decode e.d + +end + +type len = [`Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int] + +type 'a pp = 'a Fmt.t +type 'a of_string = string -> ('a, [`Msg of string]) result +type 'a to_string = 'a -> string +type 'a encode_json = Jsonm.encoder -> 'a -> unit +type 'a decode_json = Json.decoder -> ('a, [`Msg of string]) result +type 'a encode_bin = bytes -> int -> 'a -> int +type 'a decode_bin = string -> int -> int * 'a +type 'a size_of = 'a -> [ `Size of int | `Buffer of string ] +type 'a compare = 'a -> 'a -> int +type 'a equal = 'a -> 'a -> bool +type 'a hash = 'a -> int + +type 'a t = + | Self : 'a self -> 'a t + | Like : ('a, 'b) like -> 'b t + | Prim : 'a prim -> 'a t + | List : 'a len_v -> 'a list t + | Array : 'a len_v -> 'a array t + | Tuple : 'a tuple -> 'a t + | Option : 'a t -> 'a option t + | Record : 'a record -> 'a t + | Variant: 'a variant -> 'a t + +and 'a len_v = { + len: len; + v : 'a t; +} + +and ('a, 'b) like = { + x : 'a t; + pp : 'b pp option; + of_string : 'b of_string option; + encode_json : 'b encode_json option; + decode_json : 'b decode_json option; + encode_bin : 'b encode_bin option; + decode_bin : 'b decode_bin option; + hash : 'b hash option; + size_of : 'b size_of option; + compare : 'b compare option; + equal : 'b equal option; + f : ('a -> 'b); + g : ('b -> 'a); + lwit : 'b Witness.t; +} + +and 'a self = { + mutable self: 'a t; +} + +and 'a prim = + | Unit : unit prim + | Bool : bool prim + | Char : char prim + | Int : int prim + | Int32 : int32 prim + | Int64 : int64 prim + | Float : float prim + | String : len -> string prim + | Bytes : len -> bytes prim + +and 'a tuple = + | Pair : 'a t * 'b t -> ('a * 'b) tuple + | Triple : 'a t * 'b t * 'c t -> ('a * 'b * 'c) tuple + +and 'a record = { + rwit : 'a Witness.t; + rname : string; + rfields: 'a fields_and_constr; +} + +and 'a fields_and_constr = + | Fields: ('a, 'b) fields * 'b -> 'a fields_and_constr + +and ('a, 'b) fields = + | F0: ('a, 'a) fields + | F1: ('a, 'b) field * ('a, 'c) fields -> ('a, 'b -> 'c) fields + +and ('a, 'b) field = { + fname: string; + ftype: 'b t; + fget : 'a -> 'b; +} + +and 'a variant = { + vwit : 'a Witness.t; + vname : string; + vcases: 'a a_case array; + vget : 'a -> 'a case_v; +} + +and 'a a_case = + | C0: 'a case0 -> 'a a_case + | C1: ('a, 'b) case1 -> 'a a_case + +and 'a case_v = + | CV0: 'a case0 -> 'a case_v + | CV1: ('a, 'b) case1 * 'b -> 'a case_v + +and 'a case0 = { + ctag0 : int; + cname0: string; + c0 : 'a; +} + +and ('a, 'b) case1 = { + ctag1 : int; + cname1: string; + ctype1: 'b t; + c1 : 'b -> 'a; +} + +type _ a_field = Field: ('a, 'b) field -> 'a a_field + +module Refl = struct + + let prim: type a b. a prim -> b prim -> (a, b) eq option = fun a b -> + match a, b with + | Unit , Unit -> Some Refl + | Bool , Bool -> Some Refl + | Char , Char -> Some Refl + | Int , Int -> Some Refl + | Int32 , Int32 -> Some Refl + | Int64 , Int64 -> Some Refl + | Float , Float -> Some Refl + | String _ , String _ -> Some Refl + | Bytes _ , Bytes _ -> Some Refl + | _ -> None + + let rec t: type a b. a t -> b t -> (a, b) eq option = fun a b -> + match a, b with + | Self a, _ -> t a.self b + | _, Self b -> t a b.self + | Like a, Like b -> Witness.eq a.lwit b.lwit + | Prim a, Prim b -> prim a b + | Array a, Array b -> + (match t a.v b.v with Some Refl -> Some Refl | None -> None) + | List a, List b -> + (match t a.v b.v with Some Refl -> Some Refl | None -> None) + | Tuple a, Tuple b -> tuple a b + | Option a, Option b -> + (match t a b with Some Refl -> Some Refl | None -> None) + | Record a, Record b -> Witness.eq a.rwit b.rwit + | Variant a, Variant b -> Witness.eq a.vwit b.vwit + | _ -> None + + and tuple: type a b. a tuple -> b tuple -> (a, b) eq option = fun a b -> + match a, b with + | Pair (a0, a1), Pair (b0, b1) -> + (match t a0 b0, t a1 b1 with + | Some Refl, Some Refl -> Some Refl + | _ -> None) + | Triple (a0, a1, a2), Triple (b0, b1, b2) -> + (match t a0 b0, t a1 b1, t a2 b2 with + | Some Refl, Some Refl, Some Refl -> Some Refl + | _ -> None) + | _ -> None + +end + +let unit = Prim Unit +let bool = Prim Bool +let char = Prim Char +let int = Prim Int +let int32 = Prim Int32 +let int64 = Prim Int64 +let float = Prim Float +let string = Prim (String `Int64) +let bytes = Prim (Bytes `Int64) +let string_of n = Prim (String n) +let bytes_of n = Prim (Bytes n) + +let list ?(len=`Int64) v = List { v; len } +let array ?(len=`Int64) v = Array { v; len } +let pair a b = Tuple (Pair (a, b)) +let triple a b c = Tuple (Triple (a, b, c)) +let option a = Option a + +let split2 = function + | Some (x, y) -> Some x, Some y + | None -> None , None + +let split3 = function + | Some (x, y, z) -> Some x, Some y, Some z + | None -> None , None , None + +let like (type a b) (x: a t) ?cli ?json ?bin ?equal ?compare ?hash + (f: a -> b) (g: b -> a) = + let pp, of_string = split2 cli in + let encode_json, decode_json = split2 json in + let encode_bin, decode_bin, size_of = split3 bin in + Like { x = x; f; g; lwit = Witness.make (); + pp; of_string; + encode_json; decode_json; + encode_bin; decode_bin; size_of; + compare; equal; hash } + +let like' ?cli ?json ?bin ?equal ?compare ?hash t = + like ?cli ?json ?bin ?equal ?compare ?hash t (fun x -> x) (fun x -> x) + +(* fix points *) + +let mu: type a. (a t -> a t) -> a t = fun f -> + let rec fake_x = { self = Self fake_x } in + let real_x = f (Self fake_x) in + fake_x.self <- real_x; + real_x + +let mu2: type a b. (a t -> b t -> a t * b t) -> a t * b t = fun f -> + let rec fake_x = { self = Self fake_x } in + let rec fake_y = { self = Self fake_y } in + let real_x, real_y = f (Self fake_x) (Self fake_y) in + fake_x.self <- real_x; + fake_y.self <- real_y; + real_x, real_y + +(* records *) + +type ('a, 'b, 'c) open_record = + ('a, 'c) fields -> string * 'b * ('a, 'b) fields + +let field fname ftype fget = { fname; ftype; fget } + +let record: string -> 'b -> ('a, 'b, 'b) open_record = + fun n c fs -> n, c, fs + +let app: type a b c d. + (a, b, c -> d) open_record -> (a, c) field -> (a, b, d) open_record + = fun r f fs -> + let n, c, fs = r (F1 (f, fs)) in + n, c, fs + +let sealr: type a b. (a, b, a) open_record -> a t = + fun r -> + let rname, c, fs = r F0 in + let rwit = Witness.make () in + Record { rwit; rname; rfields = Fields (fs, c) } + +let (|+) = app + +(* variants *) + +type 'a case_p = 'a case_v + +type ('a, 'b) case = int -> ('a a_case * 'b) + +let case0 cname0 c0 ctag0 = + let c = { ctag0; cname0; c0 } in + C0 c, CV0 c + +let case1 cname1 ctype1 c1 ctag1 = + let c = { ctag1; cname1; ctype1; c1 } in + C1 c, fun v -> CV1 (c, v) + +type ('a, 'b, 'c) open_variant = 'a a_case list -> string * 'c * 'a a_case list + +let variant n c vs = n, c, vs + +let app v c cs = + let n, fc, cs = v cs in + let c, f = c (List.length cs) in + n, fc f, (c :: cs) + +let sealv v = + let vname, vget, vcases = v [] in + let vwit = Witness.make () in + let vcases = Array.of_list (List.rev vcases) in + Variant { vwit; vname; vcases ; vget } + +let (|~) = app + +let enum vname l = + let vwit = Witness.make () in + let _, vcases, mk = + List.fold_left (fun (ctag0, cases, mk) (n, v) -> + let c = { ctag0; cname0 = n; c0 = v } in + ctag0+1, (C0 c :: cases), (v, CV0 c) :: mk + ) (0, [], []) l + in + let vcases = Array.of_list (List.rev vcases) in + Variant { vwit; vname; vcases; vget = fun x -> List.assq x mk } + +let rec fields_aux: type a b. (a, b) fields -> a a_field list = function + | F0 -> [] + | F1 (h, t) -> Field h :: fields_aux t + +let fields r = match r.rfields with + | Fields (f, _) -> fields_aux f + +let result a b = + variant "result" (fun ok error -> function + | Ok x -> ok x + | Error x -> error x) + |~ case1 "ok" a (fun a -> Ok a) + |~ case1 "error" b (fun b -> Error b) + |> sealv + +module Equal = struct + + let unit _ _ = true + let bool (x:bool) (y:bool) = x = y + let char (x:char) (y:char) = x = y + let int (x:int) (y:int) = x = y + let int32 (x:int32) (y:int32) = x = y + let int64 (x:int64) (y:int64) = x = y + let string x y = x == y || String.equal x y + let bytes x y = x == y || Bytes.equal x y + + (* NOTE: equality is ill-defined on float *) + let float (x:float) (y:float) = x = y + + let list e x y = + x == y || (List.length x = List.length y && List.for_all2 e x y) + + let array e x y = + x == y || + (Array.length x = Array.length y && + let rec aux = function + | -1 -> true + | i -> e x.(i) y.(i) && aux (i-1) + in aux (Array.length x - 1)) + + let pair ex ey (x1, y1 as a) (x2, y2 as b) = + a == b || (ex x1 x2 && ey y1 y2) + + let triple ex ey ez (x1, y1, z1 as a) (x2, y2, z2 as b) = + a == b || (ex x1 x2 && ey y1 y2 && ez z1 z2) + + let option e x y = + x == y || + match x, y with + | None , None -> true + | Some x, Some y -> e x y + | _ -> false + + let rec t: type a. a t -> a equal = function + | Self s -> t s.self + | Like b -> like b + | Prim p -> prim p + | List l -> list (t l.v) + | Array a -> array (t a.v) + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a equal = function + | Pair (a, b) -> pair (t a) (t b) + | Triple (a, b, c) -> triple (t a) (t b) (t c) + + and like: type a b. (a, b) like -> b equal = + fun { x; g; equal; compare; _ } u v -> + match equal with + | Some f -> f u v + | None -> match compare with + | Some f -> f u v = 0 + | None -> t x (g u) (g v) + + and prim: type a. a prim -> a equal = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String _ -> string + | Bytes _ -> bytes + + and record: type a. a record -> a equal = fun r x y -> + List.for_all (function Field f -> field f x y) (fields r) + + and field: type a b. (a, b) field -> a equal = fun f x y -> + t f.ftype (f.fget x) (f.fget y) + + and variant: type a. a variant -> a equal = fun v x y -> + case_v (v.vget x) (v.vget y) + + and case_v: type a. a case_v equal = fun x y -> + match x, y with + | CV0 x , CV0 y -> int x.ctag0 y.ctag0 + | CV1 (x, vx), CV1 (y, vy) -> int x.ctag1 y.ctag1 && + eq (x.ctype1, vx) (y.ctype1, vy) + | _ -> false + + and eq: type a b. (a t * a) -> (b t * b) -> bool = fun (tx, x) (ty, y) -> + match Refl.t tx ty with + | Some Refl -> t tx x y + | None -> assert false (* this should never happen *) + +end + +let equal = Equal.t + +module Compare = struct + + let unit (_:unit) (_:unit) = 0 + let bool (x:bool) (y:bool) = Pervasives.compare x y + let char = Char.compare + let int (x:int) (y:int) = Pervasives.compare x y + let int32 = Int32.compare + let int64 = Int64.compare + let float (x:float) (y:float) = Pervasives.compare x y + let string x y = if x == y then 0 else String.compare x y + let bytes x y = if x == y then 0 else Bytes.compare x y + + let list c x y = + if x == y then 0 else + let rec aux x y = match x, y with + | [], [] -> 0 + | [], _ -> -1 + | _ , [] -> 1 + | xx::x,yy::y -> match c xx yy with + | 0 -> aux x y + | i -> i + in + aux x y + + let array c x y = + if x == y then 0 else + let lenx = Array.length x in + let leny = Array.length y in + if lenx > leny then 1 + else if lenx < leny then -1 + else + let rec aux i = match c x.(i) y.(i) with + | 0 when i+1 = lenx -> 0 + | 0 -> aux (i+1) + | i -> i + in + aux 0 + + let pair cx cy (x1, y1 as a) (x2, y2 as b) = + if a == b then 0 else + match cx x1 x2 with + | 0 -> cy y1 y2 + | i -> i + + let triple cx cy cz (x1, y1, z1 as a) (x2, y2, z2 as b) = + if a == b then 0 else + match cx x1 x2 with + | 0 -> pair cy cz (y1, z1) (y2, z2) + | i -> i + + let option c x y = + if x == y then 0 else + match x, y with + | None , None -> 0 + | Some _, None -> 1 + | None , Some _ -> -1 + | Some x, Some y -> c x y + + let rec t: type a. a t -> a compare = function + | Self s -> t s.self + | Like b -> like b + | Prim p -> prim p + | List l -> list (t l.v) + | Array a -> array (t a.v) + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a compare = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b compare = + fun { x; g; compare; _ } u v -> + match compare with + | Some f -> f u v + | None -> t x (g u) (g v) + + and prim: type a. a prim -> a compare = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String _ -> string + | Bytes _ -> bytes + + and record: type a. a record -> a compare = fun r x y -> + let rec aux = function + | [] -> 0 + | Field f :: t -> match field f x y with 0 -> aux t | i -> i + in + aux (fields r) + + and field: type a b. (a, b) field -> a compare = fun f x y -> + t f.ftype (f.fget x) (f.fget y) + + and variant: type a. a variant -> a compare = fun v x y -> + case_v (v.vget x) (v.vget y) + + and case_v: type a. a case_v compare = fun x y -> + match x, y with + | CV0 x , CV0 y -> int x.ctag0 y.ctag0 + | CV0 x , CV1 (y, _) -> int x.ctag0 y.ctag1 + | CV1 (x, _) , CV0 y -> int x.ctag1 y.ctag0 + | CV1 (x, vx), CV1 (y, vy) -> + match int x.ctag1 y.ctag1 with + | 0 -> compare (x.ctype1, vx) (y.ctype1, vy) + | i -> i + + and compare: type a b. (a t * a) -> (b t * b) -> int = fun (tx, x) (ty, y) -> + match Refl.t tx ty with + | Some Refl -> t tx x y + | None -> assert false (* this should never happen *) + +end + +let compare = Compare.t + +module Encode_json = struct + + let lexeme e l = ignore (Jsonm.encode e (`Lexeme l)) + + let unit e () = lexeme e `Null + + (* what about escaping? *) + let string e s = lexeme e (`String s) + let bytes e s = lexeme e (`String (Bytes.unsafe_to_string s)) + let char e c = string e (String.make 1 c) + let float e f = lexeme e (`Float f) + let int e i = float e (float_of_int i) + let int32 e i = float e (Int32.to_float i) + let int64 e i = float e (Int64.to_float i) + let bool e = function false -> float e 0. | _ -> float e 1. + + let list l e x = + lexeme e `As; + List.iter (l e) x; + lexeme e `Ae + + let array l e x = + lexeme e `As; + Array.iter (l e) x; + lexeme e `Ae + + let pair a b e (x, y) = + lexeme e `As; + a e x; + b e y; + lexeme e `Ae + + let triple a b c e (x, y, z) = + lexeme e `As; + a e x; + b e y; + c e z; + lexeme e `Ae + + let option o e = function + | None -> lexeme e `Null + | Some x -> o e x + + let rec t: type a. a t -> a encode_json = function + | Self s -> t s.self + | Like b -> like b + | Prim t -> prim t + | List l -> list (t l.v) + | Array a -> array (t a.v) + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a encode_json = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b encode_json = + fun { x; g; encode_json; pp; _ } e u -> + match encode_json with + | Some f -> f e u + | None -> + let string = Prim (String `Int64) in + match x, pp with + | Prim _, Some pp -> t string e (Fmt.to_to_string pp u) + | _ -> t x e (g u) + + and prim: type a. a prim -> a encode_json = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String _ -> string + | Bytes _ -> bytes + + and record: type a. a record -> a encode_json = fun r e x -> + let fields = fields r in + lexeme e `Os; + List.iter (fun (Field f) -> + match f.ftype, f.fget x with + | Option _, None -> () + | Option o, Some x -> lexeme e (`Name f.fname); t o e x + | tx , x -> lexeme e (`Name f.fname); t tx e x + ) fields; + lexeme e `Oe + + and variant: type a. a variant -> a encode_json = fun v e x -> + case_v e (v.vget x) + + and case_v: type a. a case_v encode_json = fun e c -> + match c with + | CV0 c -> string e c.cname0 + | CV1 (c,v) -> + lexeme e `Os; + lexeme e (`Name c.cname1); + t c.ctype1 e v; + lexeme e `Oe + +end + +let encode_json = Encode_json.t + +let pp_json ?minify t ppf x = + let buf = Buffer.create 42 in + let e = Jsonm.encoder ?minify (`Buffer buf) in + encode_json t e x; + ignore (Jsonm.encode e `End); + Fmt.string ppf (Buffer.contents buf) + +let to_json_string ?minify t = Fmt.to_to_string (pp_json ?minify t) + +module Decode_json = struct + + let lexeme e = match Json.decode e with + | `Lexeme e -> Ok e + | `Error e -> Error (`Msg (Fmt.to_to_string Jsonm.pp_error e)) + | `End | `Await -> assert false + + let (>>=) l f = match l with + | Error _ as e -> e + | Ok l -> f l + + let (>|=) l f = match l with + | Ok l -> Ok (f l) + | Error _ as e -> e + + let join = function + | Error _ as e -> e + | Ok x -> x + + let error e got expected = + let _, (l, c) = Jsonm.decoded_range e.Json.d in + Error (`Msg (Fmt.strf + "line %d, character %d:\nFound lexeme %a, but \ + lexeme %s was expected" l c Jsonm.pp_lexeme got expected)) + + let expect_lexeme e expected = + lexeme e >>= fun got -> + if expected = got then Ok () + else error e got (Fmt.to_to_string Jsonm.pp_lexeme expected) + + (* read all lexemes until the end of the next well-formed value *) + let value e = + let lexemes = ref [] in + let objs = ref 0 in + let arrs = ref 0 in + let rec aux () = + lexeme e >>= fun l -> + lexemes := l :: !lexemes; + let () = match l with + | `Os -> incr objs + | `As -> incr arrs + | `Oe -> decr objs + | `Ae -> decr arrs + | `Name _ + | `Null + | `Bool _ + | `String _ + | `Float _ -> () + in + if !objs > 0 || !arrs > 0 then aux () + else Ok () + in + aux () >|= fun () -> + List.rev !lexemes + + let unit e = expect_lexeme e `Null + + let string e = + lexeme e >>= function + | `String s -> Ok s + | l -> error e l "`String" + + let bytes e = + lexeme e >>= function + | `String s -> Ok (Bytes.unsafe_of_string s) + | l -> error e l "`String" + + let float e = + lexeme e >>= function + | `Float f -> Ok f + | l -> error e l "`Float" + + let char e = + lexeme e >>= function + | `String s when String.length s = 1 -> Ok (String.get s 1) + | l -> error e l "`String[1]" + + let int32 e = float e >|= Int32.of_float + let int64 e = float e >|= Int64.of_float + let int e = float e >|= int_of_float + let bool e = int e >|= function 0 -> false | _ -> true + + let list l e = + expect_lexeme e `As >>= fun () -> + let rec aux acc = + lexeme e >>= function + | `Ae -> Ok (List.rev acc) + | lex -> + Json.rewind e lex; + l e >>= fun v -> + aux (v :: acc) + in + aux [] + + let array l e = list l e >|= Array.of_list + + let pair a b e = + expect_lexeme e `As >>= fun () -> + a e >>= fun x -> + b e >>= fun y -> + expect_lexeme e `Ae >|= fun () -> + x, y + + let triple a b c e = + expect_lexeme e `As >>= fun () -> + a e >>= fun x -> + b e >>= fun y -> + c e >>= fun z -> + expect_lexeme e `Ae >|= fun () -> + x, y, z + + let option o e = + lexeme e >>= function + | `Null -> Ok None + | lex -> + Json.rewind e lex; + o e >|= fun v -> Some v + + let rec t: type a. a t -> a decode_json = function + | Self s -> t s.self + | Like b -> like b + | Prim t -> prim t + | List l -> list (t l.v) + | Array a -> array (t a.v) + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a decode_json = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b decode_json = + fun { x; f; decode_json; of_string; _ } e -> + match decode_json with + | Some d -> d e + | None -> + let string = Prim (String `Int64) in + match x, of_string with + | Prim _, Some x -> t string e >|= x |> join + | _ -> t x e >|= f + + and prim: type a. a prim -> a decode_json = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String _ -> string + | Bytes _ -> bytes + + and record: type a. a record -> a decode_json = fun r e -> + expect_lexeme e `Os >>= fun () -> + let rec soup acc = + lexeme e >>= function + | `Name n -> + value e >>= fun s -> + soup ((n, s) :: acc) + | `Oe -> Ok acc + | l -> error e l "`Record-contents" + in + soup [] >>= fun soup -> + let rec aux: type a b. (a, b) fields -> b -> (a, [`Msg of string]) result = + fun f c -> match f with + | F0 -> Ok c + | F1 (h, f) -> + let v = + try + let s = List.assoc h.fname soup in + let e = Json.decoder_of_lexemes s in + t h.ftype e + with Not_found -> + match h.ftype with + | Option _ -> Ok None + | _ -> + Error (`Msg (Fmt.strf "missing value for %s.%s" r.rname h.fname)) + in + match v with + | Ok v -> aux f (c v) + | Error _ as e -> e + in + let Fields (f, c) = r.rfields in + aux f c + + and variant: type a. a variant -> a decode_json = fun v e -> + lexeme e >>= function + | `String s -> case0 s v e + | `Os -> case1 v e + | l -> error e l "(`String | `Os)" + + and case0: type a. string -> a variant -> a decode_json = fun s v _e -> + let rec aux i = match v.vcases.(i) with + | C0 c when String.compare c.cname0 s = 0 -> Ok c.c0 + | _ -> + if i < Array.length v.vcases + then aux (i+1) + else Error (`Msg "variant") + in + aux 0 + + and case1: type a. a variant -> a decode_json = fun v e -> + lexeme e >>= function + | `Name s -> + let rec aux i = match v.vcases.(i) with + | C1 c when String.compare c.cname1 s = 0 -> t c.ctype1 e >|= c.c1 + | _ -> + if i < Array.length v.vcases + then aux (i+1) + else Error (`Msg "variant") + in + aux 0 >>= fun c -> + expect_lexeme e `Oe >|= fun () -> + c + | l -> error e l "`Name" + +end + +let decode_json x d = Decode_json.(t x @@ { Json.d; lexemes = [] }) +let decode_json_lexemes x ls = Decode_json.(t x @@ Json.decoder_of_lexemes ls) +let of_json_string x s = Decode_json.(t x @@ Json.decoder (`String s)) + +module Size_of = struct + + let len = function + | `Int8 -> 1 + | `Int16 -> 2 + | `Int32 -> 4 + | `Int64 -> 8 + | `Fixed _ -> 0 + + let size = function + | `Size s -> s + | `Buffer b -> String.length b + + let unit () = `Size 0 + let char (_:char) = `Size 1 + let int32 (_:int32) = `Size 4 + let int64 (_:int64) = `Size 8 + let int (_:int) = `Size 8 (* always use 64 bits for storing ints *) + let bool (_:bool) = `Size 1 + let float (_:float) = `Size 8 (* NOTE: we consider 'double' here *) + let string n s = `Size (len n + String.length s) + let bytes n s = `Size (len n + Bytes.length s) + + let list l n x = + `Size (List.fold_left (fun acc x -> acc + size (l x)) (len n) x) + + let array l n x = + `Size (Array.fold_left (fun acc x -> acc + size (l x)) (len n) x) + + let pair a b (x, y) = `Size (size (a x) + size (b y)) + let triple a b c (x, y, z) = `Size (size (a x) + size (b y) + size (c z)) + let option o = function + | None -> char '\000' + | Some x -> `Size (size (char '\000') + size (o x)) + + let rec t: type a. a t -> a size_of = function + | Self s -> t s.self + | Like b -> like b + | Prim t -> prim t + | List l -> list (t l.v) l.len + | Array a -> array (t a.v) a.len + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a size_of = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b size_of = + fun { x; g; size_of; _ } u -> + match size_of with + | None -> t x (g u) + | Some f -> f u + + and prim: type a. a prim -> a size_of = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String n -> string n + | Bytes n -> bytes n + + and record: type a. a record -> a size_of = fun r x -> + let fields = fields r in + let s = + List.fold_left (fun acc (Field f) -> acc + size (field f x)) 0 fields + in + `Size s + + and field: type a b. (a, b) field -> a size_of = fun f x -> + t f.ftype (f.fget x) + + and variant: type a. a variant -> a size_of = fun v x -> + match v.vget x with + | CV0 _ -> char '\000' + | CV1 (x, vx) -> `Size (size (char '\000') + size (t x.ctype1 vx)) + +end + +let size_of t x = + let rec aux: type a. a t -> a size_of = fun t x -> match t with + | Like l when l.size_of = None -> aux l.x (l.g x) + | Self s -> aux s.self x + | Prim (String _) -> `Size (String.length x) + | Prim (Bytes _) -> `Size (Bytes.length x) + | _ -> Size_of.t t x + in + aux t x + +module B = struct + + external get_16 : string -> int -> int = "%caml_string_get16" + external get_32 : string -> int -> int32 = "%caml_string_get32" + external get_64 : string -> int -> int64 = "%caml_string_get64" + + external set_16 : Bytes.t -> int -> int -> unit = "%caml_string_set16" + external set_32 : Bytes.t -> int -> int32 -> unit = "%caml_string_set32" + external set_64 : Bytes.t -> int -> int64 -> unit = "%caml_string_set64" + + external swap16 : int -> int = "%bswap16" + external swap32 : int32 -> int32 = "%bswap_int32" + external swap64 : int64 -> int64 = "%bswap_int64" + + let get_uint16 s off = + if not Sys.big_endian then swap16 (get_16 s off) else get_16 s off + + let get_uint32 s off = + if not Sys.big_endian then swap32 (get_32 s off) else get_32 s off + + let get_uint64 s off = + if not Sys.big_endian then swap64 (get_64 s off) else get_64 s off + + let set_uint16 s off v = + if not Sys.big_endian then set_16 s off (swap16 v) else set_16 s off v + + let set_uint32 s off v = + if not Sys.big_endian then set_32 s off (swap32 v) else set_32 s off v + + let set_uint64 s off v = + if not Sys.big_endian then set_64 s off (swap64 v) else set_64 s off v + + let set_char = Bytes.set + let get_char = String.get + let blit_from_bytes = Bytes.blit + let blit_from_string = Bytes.blit_string + let blit_to_bytes = String.blit +end + +module Encode_bin = struct + + let unit _buf ofs () = ofs + let char buf ofs c = B.set_char buf ofs c ; ofs + 1 + let int8 buf ofs i = char buf ofs (Char.chr i) + let int16 buf ofs i = B.set_uint16 buf ofs i ; ofs + 2 + let int32 buf ofs i = B.set_uint32 buf ofs i ; ofs + 4 + let int64 buf ofs i = B.set_uint64 buf ofs i ; ofs + 8 + let float buf ofs f = int64 buf ofs (Int64.bits_of_float f) + let int buf ofs i = int64 buf ofs (Int64.of_int i) + let bool buf ofs b = char buf ofs (if b then '\255' else '\000') + + let len n buf ofs i = match n with + | `Int8 -> int8 buf ofs i + | `Int16 -> int16 buf ofs i + | `Int32 -> int32 buf ofs (Int32.of_int i) + | `Int64 -> int64 buf ofs (Int64.of_int i) + | `Fixed _ -> ofs + + let string n buf ofs s = + let k = String.length s in + let ofs = len n buf ofs k in + B.blit_from_string s 0 buf ofs k ; + ofs + k + + let bytes n buf ofs s = + let k = Bytes.length s in + let ofs = len n buf ofs k in + B.blit_from_bytes s 0 buf ofs k ; + ofs + k + + let list l n buf ofs x = + let ofs = len n buf ofs (List.length x) in + List.fold_left (fun ofs e -> l buf ofs e) ofs x + + let array l n buf ofs x = + let ofs = len n buf ofs (Array.length x) in + Array.fold_left (fun ofs e -> l buf ofs e) ofs x + + let pair a b buf ofs (x, y) = + let ofs = a buf ofs x in + b buf ofs y + + let triple a b c buf ofs (x, y, z) = + let ofs = a buf ofs x in + let ofs = b buf ofs y in + c buf ofs z + + let option o buf ofs = function + | None -> + char buf ofs '\000' + | Some x -> + let ofs = char buf ofs '\255' in + o buf ofs x + + let rec t: type a. a t -> a encode_bin = function + | Self s -> t s.self + | Like b -> like b + | Prim t -> prim t + | List l -> list (t l.v) l.len + | Array a -> array (t a.v) a.len + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a encode_bin = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b encode_bin = + fun { x; g; encode_bin; _ } buf ofs u -> + match encode_bin with + | None -> t x buf ofs (g u) + | Some f -> f buf ofs u + + and prim: type a. a prim -> a encode_bin = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String n -> string n + | Bytes n -> bytes n + + and record: type a. a record -> a encode_bin = fun r buf ofs x -> + let fields = fields r in + List.fold_left (fun ofs (Field f) -> + t f.ftype buf ofs (f.fget x) + ) ofs fields + + and variant: type a. a variant -> a encode_bin = fun v buf ofs x -> + case_v buf ofs (v.vget x) + + and case_v: type a. a case_v encode_bin = fun buf ofs c -> + match c with + | CV0 c -> char buf ofs (char_of_int c.ctag0) + | CV1 (c, v) -> + let ofs = char buf ofs (char_of_int c.ctag1) in + t c.ctype1 buf ofs v + +end + +let err_invalid_bounds = + Fmt.invalid_arg "Irmin.Type.%s: invalid bounds; expecting %d, got %d" + +let encode_bin_bytes ?buf t x = + let rec aux: type a. a t -> a -> bytes = fun t x -> match t with + | Like l when l.encode_bin = None -> aux l.x (l.g x) + | Self s -> aux s.self x + | Prim (String _) -> Bytes.of_string x + | Prim (Bytes _) -> x + | _ -> + match size_of t x with + | `Buffer b -> Bytes.unsafe_of_string b + | `Size len -> + let exact, buf = match buf with + | None -> true, Bytes.create len + | Some buf -> + if len > Bytes.length buf then + err_invalid_bounds "Type.encode_bytes" len (Bytes.length buf) + else + false, buf + in + let len' = Encode_bin.t t buf 0 x in + if exact then assert (len = len'); + buf + in + aux t x + +let encode_bin ?buf t x = + let rec aux: type a. a t -> a -> string = fun t x -> match t with + | Like l when l.encode_bin = None -> aux l.x (l.g x) + | Self s -> aux s.self x + | Prim (String _) -> x + | Prim (Bytes _) -> Bytes.to_string x + | _ -> Bytes.unsafe_to_string (encode_bin_bytes ?buf t x) + in + aux t x + +module Decode_bin = struct + + let (>|=) (ofs, x) f = ofs, f x + let (>>=) (ofs, x) f = f (ofs, x) + let ok ofs x = (ofs, x) + + type 'a res = int * 'a + + let unit _ ofs = ok ofs () + let char buf ofs = ok (ofs+1) (B.get_char buf ofs) + let int8 buf ofs = char buf ofs >|= Char.code + let int16 buf ofs = ok (ofs+2) (B.get_uint16 buf ofs) + let int32 buf ofs = ok (ofs+4) (B.get_uint32 buf ofs) + let int64 buf ofs = ok (ofs+8) (B.get_uint64 buf ofs) + let bool buf ofs = char buf ofs >|= function '\000' -> false | _ -> true + let int buf ofs = int64 buf ofs >|= Int64.to_int + let float buf ofs = int64 buf ofs >|= Int64.float_of_bits + + let len buf ofs = function + | `Int8 -> int8 buf ofs + | `Int16 -> int16 buf ofs + | `Int32 -> int32 buf ofs >|= Int32.to_int + | `Int64 -> int64 buf ofs >|= Int64.to_int + | `Fixed n -> ok ofs n + + let string n buf ofs = + len buf ofs n >>= fun (ofs, len) -> + let str = Bytes.create len in + B.blit_to_bytes buf ofs str 0 len ; + ok (ofs+len) (Bytes.unsafe_to_string str) + + let bytes n buf ofs = + len buf ofs n >>= fun (ofs, len) -> + let str = Bytes.create len in + B.blit_to_bytes buf ofs str 0 len ; + ok (ofs+len) str + + let list l n buf ofs = + len buf ofs n >>= fun (ofs, len) -> + let rec aux acc ofs = function + | 0 -> ok ofs (List.rev acc) + | n -> + l buf ofs >>= fun (ofs, x) -> + aux (x :: acc) ofs (n - 1) + in + aux [] ofs len + + let array l len buf ofs = list l len buf ofs >|= Array.of_list + + let pair a b buf ofs = + a buf ofs >>= fun (ofs, a) -> + b buf ofs >|= fun b -> + (a, b) + + let triple a b c buf ofs = + a buf ofs >>= fun (ofs, a) -> + b buf ofs >>= fun (ofs, b) -> + c buf ofs >|= fun c -> + (a, b, c) + + let option: type a. a decode_bin -> a option decode_bin = fun o buf ofs -> + char buf ofs >>= function + | ofs, '\000' -> ok ofs None + | ofs, _ -> o buf ofs >|= fun x -> Some x + + let rec t: type a. a t -> a decode_bin = function + | Self s -> t s.self + | Like b -> like b + | Prim t -> prim t + | List l -> list (t l.v) l.len + | Array a -> array (t a.v) a.len + | Tuple t -> tuple t + | Option x -> option (t x) + | Record r -> record r + | Variant v -> variant v + + and tuple: type a. a tuple -> a decode_bin = function + | Pair (x,y) -> pair (t x) (t y) + | Triple (x,y,z) -> triple (t x) (t y) (t z) + + and like: type a b. (a, b) like -> b decode_bin = + fun { x; f; decode_bin; _ } buf ofs -> + match decode_bin with + | None -> t x buf ofs >|= f + | Some r -> r buf ofs + + and prim: type a. a prim -> a decode_bin = function + | Unit -> unit + | Bool -> bool + | Char -> char + | Int -> int + | Int32 -> int32 + | Int64 -> int64 + | Float -> float + | String n -> string n + | Bytes n -> bytes n + + and record: type a. a record -> a decode_bin = fun r buf ofs -> + match r.rfields with + | Fields (fs, c) -> + let rec aux: type b. int -> b -> (a, b) fields -> a res + = fun ofs f -> function + | F0 -> ok ofs f + | F1 (h, t) -> + field h buf ofs >>= fun (ofs, x) -> + aux ofs (f x) t + in + aux ofs c fs + + and field: type a b. (a, b) field -> b decode_bin = fun f -> t f.ftype + + and variant: type a. a variant -> a decode_bin = fun v buf ofs -> + (* FIXME: we support 'only' 256 variants *) + char buf ofs >>= fun (ofs, i) -> + case v.vcases.(int_of_char i) buf ofs + + and case: type a. a a_case -> a decode_bin = fun c buf ofs -> + match c with + | C0 c -> ok ofs c.c0 + | C1 c -> t c.ctype1 buf ofs >|= c.c1 + +end + +let map_result f = function + | Ok x -> Ok (f x) + | Error _ as e -> e + +let decode_bin ?(exact=true) t x = + let rec aux + : type a. a t -> string -> (a, [`Msg of string]) result + = fun t x -> match t with + | Like l when l.decode_bin = None -> aux l.x x |> map_result l.f + | Self s -> aux s.self x + | Prim (String _) -> Ok x + | Prim (Bytes _) -> Ok (Bytes.of_string x) + | _ -> + let last, v = Decode_bin.t t x 0 in + if exact then assert (last = String.length x); + Ok v + in + try aux t x + with Invalid_argument e -> Error (`Msg e) + +let pp t = + let rec aux: type a. a t -> a pp = fun t ppf x -> + match t with + | Self s -> aux s.self ppf x + | Like l -> like l ppf x + | Prim p -> prim p ppf x + | _ -> pp_json t ppf x + and like: type a b. (a, b) like -> b pp = fun l ppf x -> + match l.pp with + | None -> aux l.x ppf (l.g x) + | Some f -> f ppf x + and prim: type a. a prim -> a pp = fun t ppf x -> + match t with + | Unit -> () + | Bool -> Fmt.bool ppf x + | Char -> Fmt.char ppf x + | Int -> Fmt.int ppf x + | Int32 -> Fmt.int32 ppf x + | Int64 -> Fmt.int64 ppf x + | Float -> Fmt.float ppf x + | String _ -> Fmt.string ppf x + | Bytes _ -> Fmt.string ppf (Bytes.unsafe_to_string x) + in + aux t + +let to_string t = Fmt.to_to_string (pp t) + +let of_string t = + let v f x = try Ok (f x) with Invalid_argument e -> Error (`Msg e) in + let rec aux: type a a. a t -> a of_string = fun t x -> + match t with + | Self s -> aux s.self x + | Like l -> like l x + | Prim p -> prim p x + | _ -> of_json_string t x + and like: type a b. (a, b) like -> b of_string = fun l x -> + match l.of_string with + | None -> aux l.x x |> map_result l.f + | Some f -> f x + and prim: type a. a prim -> a of_string = fun t x -> + match t with + | Unit -> Ok () + | Bool -> v bool_of_string x + | Char -> v (fun x -> String.get x 1) x + | Int -> v int_of_string x + | Int32 -> v Int32.of_string x + | Int64 -> v Int64.of_string x + | Float -> v float_of_string x + | String _ -> Ok x + | Bytes _ -> Ok (Bytes.unsafe_of_string x) + in + aux t + +type 'a ty = 'a t + +let hash t x = match t with + | Like { hash = Some h; _ } -> h x + | _ -> Hashtbl.hash (encode_bin t x) + +module type S = sig + type t + val t: t ty +end diff --git a/vendors/irmin-lmdb/irmin_v2_type.mli b/vendors/irmin-lmdb/irmin_v2_type.mli new file mode 100644 index 000000000000..fad68d24b85e --- /dev/null +++ b/vendors/irmin-lmdb/irmin_v2_type.mli @@ -0,0 +1,144 @@ +(* + * Copyright (c) 2016-2017 Thomas Gazagnaire + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +type len = [ `Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int ] + +type 'a t +val unit: unit t +val bool: bool t +val char: char t +val int: int t +val int32: int32 t +val int64: int64 t +val float: float t +val string: string t +val bytes: bytes t +val list: ?len:len -> 'a t -> 'a list t +val array: ?len:len -> 'a t -> 'a array t +val option: 'a t -> 'a option t +val pair: 'a t -> 'b t -> ('a * 'b) t +val triple: 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t +val result: 'a t -> 'b t -> ('a, 'b) result t + +val string_of: len -> string t +val bytes_of: len -> bytes t + +type ('a, 'b) field +type ('a, 'b, 'c) open_record + +val field: string -> 'a t -> ('b -> 'a) -> ('b, 'a) field +val sealr: ('a, 'b, 'a) open_record -> 'a t + +val (|+): + ('a, 'b, 'c -> 'd) open_record -> ('a, 'c) field -> ('a, 'b, 'd) open_record + +val record: string -> 'b -> ('a, 'b, 'b) open_record + + +type ('a, 'b) case +type 'a case_p +type ('a, 'b, 'c) open_variant + +val case0: string -> 'a -> ('a, 'a case_p) case +val case1: string -> 'b t -> ('b -> 'a) -> ('a, 'b -> 'a case_p) case +val sealv: ('a, 'b, 'a -> 'a case_p) open_variant -> 'a t +val variant: string -> 'b -> ('a, 'b, 'b) open_variant + +val (|~): + ('a, 'b, 'c -> 'd) open_variant -> ('a, 'c) case -> ('a, 'b, 'd) open_variant + +val enum: string -> (string * 'a) list -> 'a t + +val mu: ('a t -> 'a t) -> 'a t +val mu2: ('a t -> 'b t -> 'a t * 'b t) -> 'a t * 'b t + +(* generics *) + +val equal: 'a t -> 'a -> 'a -> bool +val compare: 'a t -> 'a -> 'a -> int +val hash: 'a t -> 'a -> int + +(* CLI *) + +type 'a pp = 'a Fmt.t +type 'a to_string = 'a -> string +type 'a of_string = string -> ('a, [`Msg of string]) result + +val pp: 'a t -> 'a Fmt.t +val of_string: 'a t -> 'a of_string + +(* JSON (wire) *) + +module Json: sig + type decoder + val decoder: ?encoding:[< Jsonm.encoding ] -> [< Jsonm.src ] -> decoder + val decode: decoder -> + [> `Await | `End | `Error of Jsonm.error | `Lexeme of Jsonm.lexeme ] + val rewind: decoder -> Jsonm.lexeme -> unit +end + +type 'a encode_json = Jsonm.encoder -> 'a -> unit +type 'a decode_json = Json.decoder -> ('a, [`Msg of string]) result + +(* Raw (disk) *) + +type 'a encode_bin = bytes -> int -> 'a -> int +type 'a decode_bin = string -> int -> int * 'a +type 'a size_of = 'a -> [ `Size of int | `Buffer of string ] + +val size_of: 'a t -> 'a size_of +(* like *) + +val like: 'a t -> + ?cli:('b pp * 'b of_string) -> + ?json:('b encode_json * 'b decode_json) -> + ?bin:('b encode_bin * 'b decode_bin * 'b size_of) -> + ?equal:('b -> 'b -> bool) -> + ?compare:('b -> 'b -> int) -> + ?hash:('b -> int) -> + ('a -> 'b) -> ('b -> 'a) -> 'b t + +val like': + ?cli:('a pp * 'a of_string) -> + ?json:('a encode_json * 'a decode_json) -> + ?bin:('a encode_bin * 'a decode_bin * 'a size_of) -> + ?equal:('a -> 'a -> bool) -> + ?compare:('a -> 'a -> int) -> + ?hash:('a -> int) -> + 'a t -> 'a t + +(* convenient functions. *) + +val to_string: 'a t -> 'a -> string + +val pp_json: ?minify:bool -> 'a t -> 'a Fmt.t + +val encode_json: 'a t -> Jsonm.encoder -> 'a -> unit +val decode_json: 'a t -> Jsonm.decoder -> ('a, [`Msg of string]) result +val decode_json_lexemes: 'a t -> Jsonm.lexeme list -> ('a, [`Msg of string]) result + +val to_json_string: ?minify:bool -> 'a t -> 'a to_string +val of_json_string: 'a t -> 'a of_string + +val encode_bin: ?buf:bytes -> 'a t -> 'a to_string +val decode_bin: ?exact:bool -> 'a t -> 'a of_string + +type 'a ty = 'a t + +module type S = sig + type t + val t: t ty +end -- GitLab From 8e47e0025d8096abfd8942e93c71958fd50ea30c Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Wed, 30 Jan 2019 08:05:41 +0100 Subject: [PATCH 5/5] irmin-lmdb/GC: Use ints with variable width instead of hardcoding limits in the serialization format --- vendors/irmin-lmdb/irmin_lmdb.ml | 4 +- vendors/irmin-lmdb/irmin_v2_type.ml | 69 ++++++++++++++++++++-------- vendors/irmin-lmdb/irmin_v2_type.mli | 2 +- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/vendors/irmin-lmdb/irmin_lmdb.ml b/vendors/irmin-lmdb/irmin_lmdb.ml index 3e3ab95aea89..7d7c0b0fd95c 100644 --- a/vendors/irmin-lmdb/irmin_lmdb.ml +++ b/vendors/irmin-lmdb/irmin_lmdb.ml @@ -347,7 +347,7 @@ module Irmin_value_store |+ field "kind" (option metadata_v2_t) (function | { kind = `Node ; _ } -> None | { kind = `Contents m ; _ } -> Some m) - |+ field "name" (string_of `Int8) (fun { name ; _ } -> name) + |+ field "name" string (fun { name ; _ } -> name) |+ field "node" hash_v2_t (fun { node ; _ } -> node) |> sealr @@ -549,7 +549,7 @@ module Irmin_value_store let open Irmin_v2_type in record "entries" (fun v entries -> { version = Char.code v; entries }) |+ field "version" char (fun t -> Char.chr t.version) - |+ field "entries" (list ~len:`Int16 Val.entry_v2_t) (fun t -> t.entries) + |+ field "entries" (list Val.entry_v2_t) (fun t -> t.entries) |> sealr let version v = match Cstruct.get_uint8 v 0 with diff --git a/vendors/irmin-lmdb/irmin_v2_type.ml b/vendors/irmin-lmdb/irmin_v2_type.ml index 937189c49932..f929359a003e 100644 --- a/vendors/irmin-lmdb/irmin_v2_type.ml +++ b/vendors/irmin-lmdb/irmin_v2_type.ml @@ -66,7 +66,7 @@ module Json = struct end -type len = [`Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int] +type len = [ `Int | `Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int ] type 'a pp = 'a Fmt.t type 'a of_string = string -> ('a, [`Msg of string]) result @@ -234,13 +234,13 @@ let int = Prim Int let int32 = Prim Int32 let int64 = Prim Int64 let float = Prim Float -let string = Prim (String `Int64) -let bytes = Prim (Bytes `Int64) +let string = Prim (String `Int) +let bytes = Prim (Bytes `Int) let string_of n = Prim (String n) let bytes_of n = Prim (Bytes n) -let list ?(len=`Int64) v = List { v; len } -let array ?(len=`Int64) v = Array { v; len } +let list ?(len=`Int) v = List { v; len } +let array ?(len=`Int) v = Array { v; len } let pair a b = Tuple (Pair (a, b)) let triple a b c = Tuple (Triple (a, b, c)) let option a = Option a @@ -646,7 +646,7 @@ module Encode_json = struct match encode_json with | Some f -> f e u | None -> - let string = Prim (String `Int64) in + let string = Prim (String `Int) in match x, pp with | Prim _, Some pp -> t string e (Fmt.to_to_string pp u) | _ -> t x e (g u) @@ -772,7 +772,7 @@ module Decode_json = struct let char e = lexeme e >>= function - | `String s when String.length s = 1 -> Ok (String.get s 1) + | `String s when String.length s = 1 -> Ok (String.get s 0) | l -> error e l "`String[1]" let int32 e = float e >|= Int32.of_float @@ -836,7 +836,7 @@ module Decode_json = struct match decode_json with | Some d -> d e | None -> - let string = Prim (String `Int64) in + let string = Prim (String `Int) in match x, of_string with | Prim _, Some x -> t string e >|= x |> join | _ -> t x e >|= f @@ -924,32 +924,41 @@ let of_json_string x s = Decode_json.(t x @@ Json.decoder (`String s)) module Size_of = struct - let len = function + let int n = + let rec aux len n = + if n >= 0 && n < 128 then len + else aux (len+1) (n lsr 7) + in + `Size (aux 1 n) + + let size = function + | `Size s -> s + | `Buffer b -> String.length b + + let len n = function + | `Int -> size (int n) | `Int8 -> 1 | `Int16 -> 2 | `Int32 -> 4 | `Int64 -> 8 | `Fixed _ -> 0 - let size = function - | `Size s -> s - | `Buffer b -> String.length b - let unit () = `Size 0 let char (_:char) = `Size 1 let int32 (_:int32) = `Size 4 let int64 (_:int64) = `Size 8 - let int (_:int) = `Size 8 (* always use 64 bits for storing ints *) let bool (_:bool) = `Size 1 let float (_:float) = `Size 8 (* NOTE: we consider 'double' here *) - let string n s = `Size (len n + String.length s) - let bytes n s = `Size (len n + Bytes.length s) + let string n s = let s = String.length s in `Size (len s n + s) + let bytes n s = let s = Bytes.length s in `Size (len s n + s) let list l n x = - `Size (List.fold_left (fun acc x -> acc + size (l x)) (len n) x) + let init = len (List.length x) n in + `Size (List.fold_left (fun acc x -> acc + size (l x)) init x) let array l n x = - `Size (Array.fold_left (fun acc x -> acc + size (l x)) (len n) x) + let init = len (Array.length x) n in + `Size (Array.fold_left (fun acc x -> acc + size (l x)) init x) let pair a b (x, y) = `Size (size (a x) + size (b y)) let triple a b c (x, y, z) = `Size (size (a x) + size (b y) + size (c z)) @@ -1064,10 +1073,21 @@ module Encode_bin = struct let int32 buf ofs i = B.set_uint32 buf ofs i ; ofs + 4 let int64 buf ofs i = B.set_uint64 buf ofs i ; ofs + 8 let float buf ofs f = int64 buf ofs (Int64.bits_of_float f) - let int buf ofs i = int64 buf ofs (Int64.of_int i) let bool buf ofs b = char buf ofs (if b then '\255' else '\000') + let int buf ofs i = + let rec aux n ofs = + if n >= 0 && n < 128 then + int8 buf ofs n + else + let out = 128 + (n land 127) in + let ofs = int8 buf ofs out in + aux (n lsr 7) ofs + in + aux i ofs + let len n buf ofs i = match n with + | `Int -> int buf ofs i | `Int8 -> int8 buf ofs i | `Int16 -> int16 buf ofs i | `Int32 -> int32 buf ofs (Int32.of_int i) @@ -1212,10 +1232,19 @@ module Decode_bin = struct let int32 buf ofs = ok (ofs+4) (B.get_uint32 buf ofs) let int64 buf ofs = ok (ofs+8) (B.get_uint64 buf ofs) let bool buf ofs = char buf ofs >|= function '\000' -> false | _ -> true - let int buf ofs = int64 buf ofs >|= Int64.to_int let float buf ofs = int64 buf ofs >|= Int64.float_of_bits + let int buf ofs = + let rec aux n p ofs = + int8 buf ofs >>= fun (ofs, i) -> + let n = n + ((i land 127) lsl (p*7)) in + if i >= 0 && i < 128 then (ofs, n) + else aux n (p+1) ofs + in + aux 0 0 ofs + let len buf ofs = function + | `Int -> int buf ofs | `Int8 -> int8 buf ofs | `Int16 -> int16 buf ofs | `Int32 -> int32 buf ofs >|= Int32.to_int diff --git a/vendors/irmin-lmdb/irmin_v2_type.mli b/vendors/irmin-lmdb/irmin_v2_type.mli index fad68d24b85e..2cf98e53238a 100644 --- a/vendors/irmin-lmdb/irmin_v2_type.mli +++ b/vendors/irmin-lmdb/irmin_v2_type.mli @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -type len = [ `Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int ] +type len = [ `Int | `Int8 | `Int16 | `Int32 | `Int64 | `Fixed of int ] type 'a t val unit: unit t -- GitLab