From 54105f47ac8d8b09ecdc69eef24811a979397e31 Mon Sep 17 00:00:00 2001 From: Litchi Pi Date: Thu, 27 Jan 2022 11:44:51 +0100 Subject: [PATCH 1/2] lib_store: Allowing importing http served tar snapshots Signed-off-by: Litchi Pi --- src/lib_store/dune | 4 +- src/lib_store/snapshots.ml | 90 +++++++++++++++++++++++++++++++++---- src/lib_store/snapshots.mli | 2 +- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/src/lib_store/dune b/src/lib_store/dune index 744e601258f2..391b4f62b631 100644 --- a/src/lib_store/dune +++ b/src/lib_store/dune @@ -19,7 +19,9 @@ camlzip tar tar-unix - prometheus) + prometheus + cohttp + cohttp-lwt-unix) (flags (:standard -open Tezos_shell_services diff --git a/src/lib_store/snapshots.ml b/src/lib_store/snapshots.ml index a4f8f13e49ae..d8dc99f8e288 100644 --- a/src/lib_store/snapshots.ml +++ b/src/lib_store/snapshots.ml @@ -605,13 +605,14 @@ let pp_snapshot_header ppf = function let version = function Current_header (version, _) -> version -type snapshot_format = Tar | Raw +type snapshot_format = Tar | Http | Raw let snapshot_format_encoding = Data_encoding.string_enum [("Tar", Tar); ("Raw", Raw)] let pp_snapshot_format ppf = function | Tar -> Format.fprintf ppf "tar (single file)" + | Http -> Format.fprintf ppf "tar (from HTTP)" | Raw -> Format.fprintf ppf "directory" (* To speed up the import of the cemented blocks we increase, @@ -2477,7 +2478,7 @@ end module type LOADER = sig type t - val load : string -> t Lwt.t + val load : string -> t tzresult Lwt.t val load_snapshot_header : t -> header tzresult Lwt.t @@ -2489,7 +2490,7 @@ module Raw_loader : LOADER = struct let load snapshot_path = let snapshot_dir = Naming.snapshot_dir ~snapshot_path () in - Lwt.return {snapshot_dir} + Lwt_result_syntax.return {snapshot_dir} let load_snapshot_version t = let open Lwt_result_syntax in @@ -2539,7 +2540,7 @@ module Tar_loader : LOADER = struct snapshot_dir in let* tar = Onthefly.open_in ~file:(Naming.file_path snapshot_file) in - Lwt.return {tar; snapshot_file; snapshot_tar} + Lwt_result_syntax.return {tar; snapshot_file; snapshot_tar} let load_snapshot_version t = let open Lwt_tzresult_syntax in @@ -2590,6 +2591,60 @@ module Tar_loader : LOADER = struct let close t = Onthefly.close_in t.tar end +module Http_loader : LOADER = struct + open Cohttp_lwt + open Cohttp_lwt_unix + + type t = { + url : Uri.t; + tmp_dir : string; + tar_t : Tar_loader.t; + } + + let test_is_http str_url = + match Uri.scheme str_url with + | Some url -> (match String.lowercase_ascii url with + | "http" -> true + | "https" -> true + | _ -> false + ) + | None -> false + + let download_tar_file url dest_dir = + let open Lwt_result_syntax in + let* fname = match List.last_opt (Str.split (Str.regexp "/+") (Uri.path url)) with + | Some d -> return d + | None -> failwith "Cannot access to filename from URL" + in + let dest = dest_dir ^ fname in + let*! _resp, body = Client.get url in + let stream = Body.to_stream body in + let*! () = Lwt_io.with_file ~mode:Lwt_io.output dest (fun chan -> + Lwt_stream.iter_s (Lwt_io.write chan) stream) in + return dest + + let load uri_string = + let open Lwt_result_syntax in + let url = Uri.of_string uri_string in + if not (test_is_http url) then + failwith "Input is not a valid HTTP URL" + else + let tmp_dir = Filename.temp_file "tzsnapshot" "" in + let*! () = Lwt_unix.unlink tmp_dir in + let*! () = Lwt_unix.mkdir tmp_dir 0o700 in + let* fname = download_tar_file url tmp_dir in + let* tar_t = Tar_loader.load fname in + return {url; tmp_dir; tar_t} + + let load_snapshot_header t = + Tar_loader.load_snapshot_header t.tar_t + + let close t = + let open Lwt_syntax in + let* () = Tar_loader.close t.tar_t in + Lwt_unix.rmdir t.tmp_dir +end + module type Snapshot_loader = sig type t @@ -2604,13 +2659,13 @@ module Make_snapshot_loader (Loader : LOADER) : Snapshot_loader = struct let close = Loader.close let load_snapshot_header ~snapshot_path = - let open Lwt_syntax in + let open Lwt_result_syntax in let* loader = load snapshot_path in trace (Wrong_snapshot_file {filename = snapshot_path}) @@ protect (fun () -> Loader.load_snapshot_header loader) ~on_error:(fun err -> - let* () = close loader in + let*! () = close loader in Lwt.return_error err) end @@ -3604,7 +3659,17 @@ end snapshot. We assume that a snapshot is valid if the medata can be read. *) let snapshot_file_kind ~snapshot_path = - let open Lwt_tzresult_syntax in + let open Lwt_result_syntax in + let is_valid_remote_snapshot file = + let (module Loader) = + (module Make_snapshot_loader (Http_loader) : Snapshot_loader) + in + Lwt.catch + (fun () -> + Loader.load_snapshot_header ~snapshot_path:(Naming.file_path file) + >>=? fun _header -> return true) + fail_with_exn + in let is_valid_uncompressed_snapshot file = let (module Loader) = (module Make_snapshot_loader (Tar_loader) : Snapshot_loader) @@ -3633,14 +3698,18 @@ let snapshot_file_kind ~snapshot_path = let* () = is_valid_raw_snapshot snapshot_dir in return Raw else + let open Lwt_result_syntax in let snapshot_file = Naming.snapshot_file ~snapshot_filename:(Filename.basename snapshot_path) Naming.( snapshot_dir ~snapshot_path:(Filename.dirname snapshot_path) ()) in - let* () = is_valid_uncompressed_snapshot snapshot_file in - return Tar) + let* valid_remote_snapshot = is_valid_remote_snapshot snapshot_file in + if valid_remote_snapshot then return Http + else + let* () = is_valid_uncompressed_snapshot snapshot_file in + return Tar) let export ?snapshot_path export_format ?rolling ~block ~store_dir ~context_dir ~chain_name genesis = @@ -3648,6 +3717,7 @@ let export ?snapshot_path export_format ?rolling ~block ~store_dir ~context_dir match export_format with | Tar -> (module Make_snapshot_exporter (Tar_exporter) : Snapshot_exporter) | Raw -> (module Make_snapshot_exporter (Raw_exporter) : Snapshot_exporter) + | Http -> (module Make_snapshot_exporter (Tar_exporter) : Snapshot_exporter) in Exporter.export ?snapshot_path @@ -3665,6 +3735,7 @@ let read_snapshot_header ~snapshot_path = match kind with | Tar -> (module Make_snapshot_loader (Tar_loader) : Snapshot_loader) | Raw -> (module Make_snapshot_loader (Raw_loader) : Snapshot_loader) + | Http -> (module Make_snapshot_loader (Http_loader) : Snapshot_loader) in let* (version, metadata) = Loader.load_snapshot_header ~snapshot_path in return (Current_header (version, metadata)) @@ -3677,6 +3748,7 @@ let import ~snapshot_path ?patch_context ?block ?check_consistency let (module Importer) = match kind with | Tar -> (module Make_snapshot_importer (Tar_importer) : Snapshot_importer) + | Http -> (module Make_snapshot_importer (Tar_importer) : Snapshot_importer) | Raw -> (module Make_snapshot_importer (Raw_importer) : Snapshot_importer) in let dst_store_dir = Naming.store_dir ~dir_path:dst_store_dir in diff --git a/src/lib_store/snapshots.mli b/src/lib_store/snapshots.mli index 98eb4e64793d..36d995225b96 100644 --- a/src/lib_store/snapshots.mli +++ b/src/lib_store/snapshots.mli @@ -142,7 +142,7 @@ type error += (** Current version of snapshots *) val current_version : int -type snapshot_format = Tar | Raw +type snapshot_format = Tar | Http | Raw val pp_snapshot_format : Format.formatter -> snapshot_format -> unit -- GitLab From b15854c57adc5d41d842f006f7872e2e6bff0275 Mon Sep 17 00:00:00 2001 From: Litchi Pi Date: Mon, 31 Jan 2022 16:07:28 +0100 Subject: [PATCH 2/2] Debugging / imroving v1 Signed-off-by: Litchi Pi --- src/bin_node/node_snapshot_command.ml | 25 +++++-- src/lib_store/snapshots.ml | 94 +++++++++++++-------------- src/lib_store/snapshots.mli | 2 + 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/src/bin_node/node_snapshot_command.ml b/src/bin_node/node_snapshot_command.ml index 9a76969848ac..4f81d95ab3dc 100644 --- a/src/bin_node/node_snapshot_command.ml +++ b/src/bin_node/node_snapshot_command.ml @@ -113,9 +113,7 @@ end module Term = struct type subcommand = Export | Import | Info - let check_snapshot_path = function - | None -> fail Missing_file_argument - | Some path -> + let check_snapshot_path path = if Sys.file_exists path then return path else fail (Cannot_locate_file path) @@ -159,17 +157,28 @@ module Term = struct ~block genesis | Import -> + Format.printf "Trying to import\n"; let data_dir = Option.value args.data_dir ~default:Node_config_file.default_data_dir in + Format.printf "check if datadir exist\n"; let*! existing_data_dir = Lwt_unix.file_exists data_dir in let* node_config = Node_shared_arg.read_and_patch_config_file args in let ({genesis; _} : Node_config_file.blockchain_network) = node_config.blockchain_network in - let* snapshot_path = check_snapshot_path snapshot_path in + Format.printf "Check if path is url\n"; + let* snapshot_path = match snapshot_path with + | Some path -> ( + let path_is_url = Snapshots.check_is_url path in + if path_is_url then return path + else check_snapshot_path path + ) + | None -> fail Missing_file_argument + in + Format.printf "Creating dir cleaner function\n"; let dir_cleaner () = let*! () = Event.(emit cleaning_up_after_failure) data_dir in if existing_data_dir then @@ -208,6 +217,7 @@ module Term = struct let patch_context = Patch_context.patch_context genesis sandbox_parameters in + Format.printf "Starting protect\n"; let* () = protect ~on_error:(fun err -> @@ -224,6 +234,7 @@ module Term = struct | None -> return_none in let check_consistency = not disable_check in + Format.printf "Importing snapshot\n"; Snapshots.import ~snapshot_path ~patch_context @@ -251,7 +262,10 @@ module Term = struct node_config.blockchain_network.user_activated_protocol_overrides else return_unit | Info -> - let* snapshot_path = check_snapshot_path snapshot_path in + let* snapshot_path = match snapshot_path with + | Some path -> check_snapshot_path path + | None -> fail Missing_file_argument + in let* snapshot_header = Snapshots.read_snapshot_header ~snapshot_path in @@ -325,6 +339,7 @@ module Term = struct and printer ppf = function | Snapshots.Tar -> Format.fprintf ppf "tar" | Raw -> Format.fprintf ppf "raw" + | Snapshots.Http -> Format.fprintf ppf "tar" in let open Cmdliner.Arg in let doc = diff --git a/src/lib_store/snapshots.ml b/src/lib_store/snapshots.ml index d8dc99f8e288..d801da78635c 100644 --- a/src/lib_store/snapshots.ml +++ b/src/lib_store/snapshots.ml @@ -2601,21 +2601,11 @@ module Http_loader : LOADER = struct tar_t : Tar_loader.t; } - let test_is_http str_url = - match Uri.scheme str_url with - | Some url -> (match String.lowercase_ascii url with - | "http" -> true - | "https" -> true - | _ -> false - ) - | None -> false - let download_tar_file url dest_dir = let open Lwt_result_syntax in - let* fname = match List.last_opt (Str.split (Str.regexp "/+") (Uri.path url)) with - | Some d -> return d - | None -> failwith "Cannot access to filename from URL" - in + Format.printf "Downloading file in dir %s" dest_dir; + let fname = Filename.basename (Uri.path url) in + Format.printf "Downloading file to %s" fname; let dest = dest_dir ^ fname in let*! _resp, body = Client.get url in let stream = Body.to_stream body in @@ -2626,21 +2616,22 @@ module Http_loader : LOADER = struct let load uri_string = let open Lwt_result_syntax in let url = Uri.of_string uri_string in - if not (test_is_http url) then - failwith "Input is not a valid HTTP URL" - else - let tmp_dir = Filename.temp_file "tzsnapshot" "" in - let*! () = Lwt_unix.unlink tmp_dir in - let*! () = Lwt_unix.mkdir tmp_dir 0o700 in - let* fname = download_tar_file url tmp_dir in - let* tar_t = Tar_loader.load fname in - return {url; tmp_dir; tar_t} + Format.printf "Loading url %s" uri_string ; + let tmp_dir = Filename.temp_file "tzsnapshot" "" in + Format.printf "Created temporary dir %s" tmp_dir; + let*! () = Lwt_unix.unlink tmp_dir in + let*! () = Lwt_unix.mkdir tmp_dir 0o700 in + let* fname = download_tar_file url tmp_dir in + let* tar_t = Tar_loader.load fname in + return {url; tmp_dir; tar_t} let load_snapshot_header t = + Format.printf "Load snapshot header\n"; Tar_loader.load_snapshot_header t.tar_t let close t = let open Lwt_syntax in + Format.printf "Close snapshot\n"; let* () = Tar_loader.close t.tar_t in Lwt_unix.rmdir t.tmp_dir end @@ -3514,6 +3505,7 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct (genesis : Genesis.t) = let open Lwt_tzresult_syntax in let chain_id = Chain_id.of_block_hash genesis.Genesis.block in + Format.printf "Init importer\n"; let*! snapshot_importer = init ~snapshot_path ~dst_store_dir chain_id in let dst_store_dir = Naming.dir_path dst_store_dir in let* () = @@ -3526,6 +3518,7 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct let chain_id = Chain_id.of_block_hash genesis.block in let dst_chain_dir = Naming.chain_dir dst_store_dir chain_id in let dst_cemented_dir = Naming.cemented_blocks_dir dst_chain_dir in + Format.printf "Create directories\n"; (* Create directories *) let*! () = List.iter_s @@ -3542,6 +3535,7 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct (Sys.file_exists snapshot_path) (Snapshot_file_not_found snapshot_path) in + Format.printf "Load snapshot header"; let* snapshot_header = Importer.load_snapshot_header snapshot_importer in let (_, snapshot_metadata) = snapshot_header in let* () = @@ -3655,21 +3649,20 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct return_unit end +let check_is_url path = + match Uri.scheme (Uri.of_string path) with + | Some url -> (match String.lowercase_ascii url with + | "http" -> true + | "https" -> true + | _ -> false + ) + | None -> false + (* [snapshot_file_kind ~snapshot_path] returns the kind of a snapshot. We assume that a snapshot is valid if the medata can be read. *) let snapshot_file_kind ~snapshot_path = let open Lwt_result_syntax in - let is_valid_remote_snapshot file = - let (module Loader) = - (module Make_snapshot_loader (Http_loader) : Snapshot_loader) - in - Lwt.catch - (fun () -> - Loader.load_snapshot_header ~snapshot_path:(Naming.file_path file) - >>=? fun _header -> return true) - fail_with_exn - in let is_valid_uncompressed_snapshot file = let (module Loader) = (module Make_snapshot_loader (Tar_loader) : Snapshot_loader) @@ -3692,24 +3685,26 @@ let snapshot_file_kind ~snapshot_path = return_unit) in protect (fun () -> - let*! is_dir = Lwt_utils_unix.is_directory snapshot_path in - if is_dir then - let snapshot_dir = Naming.snapshot_dir ~snapshot_path () in - let* () = is_valid_raw_snapshot snapshot_dir in - return Raw - else - let open Lwt_result_syntax in - let snapshot_file = + if check_is_url snapshot_path + then return Http + else ( + let*! is_dir = Lwt_utils_unix.is_directory snapshot_path in + if is_dir then + let snapshot_dir = Naming.snapshot_dir ~snapshot_path () in + let* () = is_valid_raw_snapshot snapshot_dir in + return Raw + else ( + let snapshot_file = Naming.snapshot_file - ~snapshot_filename:(Filename.basename snapshot_path) - Naming.( - snapshot_dir ~snapshot_path:(Filename.dirname snapshot_path) ()) - in - let* valid_remote_snapshot = is_valid_remote_snapshot snapshot_file in - if valid_remote_snapshot then return Http - else + ~snapshot_filename:(Filename.basename snapshot_path) + Naming.( + snapshot_dir ~snapshot_path:(Filename.dirname snapshot_path) ()) + in let* () = is_valid_uncompressed_snapshot snapshot_file in - return Tar) + return Tar + ) + ) + ) let export ?snapshot_path export_format ?rolling ~block ~store_dir ~context_dir ~chain_name genesis = @@ -3729,6 +3724,7 @@ let export ?snapshot_path export_format ?rolling ~block ~store_dir ~context_dir genesis let read_snapshot_header ~snapshot_path = + Format.printf "Read snapshot header\n"; let open Lwt_tzresult_syntax in let* kind = snapshot_file_kind ~snapshot_path in let (module Loader) = @@ -3745,12 +3741,14 @@ let import ~snapshot_path ?patch_context ?block ?check_consistency ~user_activated_protocol_overrides genesis = let open Lwt_tzresult_syntax in let* kind = snapshot_file_kind ~snapshot_path in + Format.printf "Getting Importer module from snapshot kind\n"; let (module Importer) = match kind with | Tar -> (module Make_snapshot_importer (Tar_importer) : Snapshot_importer) | Http -> (module Make_snapshot_importer (Tar_importer) : Snapshot_importer) | Raw -> (module Make_snapshot_importer (Raw_importer) : Snapshot_importer) in + Format.printf "Store dir: %s\n" dst_store_dir; let dst_store_dir = Naming.store_dir ~dir_path:dst_store_dir in Importer.import ~snapshot_path diff --git a/src/lib_store/snapshots.mli b/src/lib_store/snapshots.mli index 36d995225b96..7d0f4d781c61 100644 --- a/src/lib_store/snapshots.mli +++ b/src/lib_store/snapshots.mli @@ -208,3 +208,5 @@ val import : returns its kind. Returns [Invalid] if it is a wrong snapshot file. *) val snapshot_file_kind : snapshot_path:string -> snapshot_format tzresult Lwt.t + +val check_is_url : string -> bool -- GitLab