From 311099798f887f754c540beacfddc2a57dddc165 Mon Sep 17 00:00:00 2001 From: Neo <11726174-neo.nomadic@users.noreply.gitlab.com> Date: Tue, 3 Jun 2025 20:52:22 +0200 Subject: [PATCH] CI: Add GitLab Runner Tags for Very High CPU with RAMFS Support --- ci/bin/code_verification.ml | 26 ++++++---- ci/bin/common.ml | 27 ++++++++--- ci/bin/master_branch.ml | 21 ++++++-- ci/bin/release_tag.ml | 6 ++- ci/lib_tezos_ci/tezos_ci.ml | 94 +++++++++++++++++++++++++----------- ci/lib_tezos_ci/tezos_ci.mli | 12 ++++- teztale/ci/common.ml | 3 +- teztale/ci/release.ml | 7 +-- 8 files changed, 143 insertions(+), 53 deletions(-) diff --git a/ci/bin/code_verification.ml b/ci/bin/code_verification.ml index 878fd55a8d76..b30b746acd24 100644 --- a/ci/bin/code_verification.ml +++ b/ci/bin/code_verification.ml @@ -685,6 +685,7 @@ let jobs pipeline_type = job_build_static_binaries ~__POS__ ~arch:Arm64 + ~storage:Ramfs ~dependencies:dependencies_needs_start (* See rationale above *) ~rules:(make_rules ~manual:(On_changes changeset_octez) ()) () @@ -723,6 +724,7 @@ let jobs pipeline_type = job ~__POS__ ~arch:Arm64 + ~storage:Ramfs ~name:("etherlink.build:static-" ^ arch_to_string Arm64) ~image:Images.CI.build ~stage:Stages.build @@ -845,10 +847,11 @@ let jobs pipeline_type = ~rules:[job_rule ~when_:Always ()] () in - let job_build_teztale ?cpu ~arch () = + let job_build_teztale ?cpu ~arch ?storage () = Teztale.Common.job_build ~arch ?cpu + ?storage ~rules:(make_rules ~manual:Yes ~changes:Teztale.Common.changeset ()) () in @@ -867,7 +870,7 @@ let jobs pipeline_type = build_octez_source; job_build_grafazos; job_build_teztale ~arch:Amd64 ~cpu:Very_high (); - job_build_teztale ~arch:Arm64 (); + job_build_teztale ~arch:Arm64 ~storage:Ramfs (); job_evm_static_x86_64_experimental; job_evm_static_arm64_experimental; job_build_layer1_profiling (); @@ -1070,7 +1073,7 @@ let jobs pipeline_type = make_rules ~changes:changeset_octez ~dependent:true () in let job_unit_test ~__POS__ ?(image = Images.CI.build) ?timeout - ?parallel_vector ?(rules = rules) ~arch ?(cpu = Normal) ~name + ?parallel_vector ?(rules = rules) ~arch ?(cpu = Normal) ?storage ~name ~make_targets () : tezos_job = let arch_string = arch_to_string arch in let script = ["make $MAKE_TARGETS"] in @@ -1105,6 +1108,7 @@ let jobs pipeline_type = ~image ~arch ~cpu + ?storage ~dependencies ~rules ~variables @@ -1172,6 +1176,7 @@ let jobs pipeline_type = job_unit_test ~__POS__ ~name:"oc.unit:non-proto-arm64" + ~storage:Ramfs ~parallel_vector:2 ~arch:Arm64 (* The [lib_benchmark] unit tests require Python *) ~image:Images.CI.test @@ -1214,11 +1219,12 @@ let jobs pipeline_type = in (* "de" stands for data-encoding, since data-encoding is considered to be a separate product. *) - let de_unit arch = + let de_unit arch ?storage () = job ~__POS__ ~name:("de.unit:" ^ arch_to_string arch) ~arch + ?storage ~image:Images.CI.test ~stage:Stages.test ~rules: @@ -1232,11 +1238,12 @@ let jobs pipeline_type = ~before_script:(before_script ~eval_opam:true []) ["dune runtest data-encoding"] in - let resto_unit arch = + let resto_unit arch ?storage () = job ~__POS__ ~name:("resto.unit:" ^ arch_to_string arch) ~arch + ?storage ~image:Images.CI.test ~stage:Stages.test ~rules: @@ -1256,10 +1263,10 @@ let jobs pipeline_type = oc_unit_non_proto_arm64; oc_unit_webassembly_x86_64; oc_unit_protocol_compiles; - de_unit Amd64; - de_unit Arm64; - resto_unit Amd64; - resto_unit Arm64; + de_unit Amd64 (); + de_unit Arm64 ~storage:Ramfs (); + resto_unit Amd64 (); + resto_unit Arm64 ~storage:Ramfs (); ] in let job_oc_integration_compiler_rejections : tezos_job = @@ -2086,6 +2093,7 @@ let jobs pipeline_type = job_docker_build ~__POS__ ~arch:Arm64 + ~storage:Ramfs ~dependencies:(Dependent []) ~rules:(make_rules ~changes:changeset_docker_files ~manual:Yes ()) Test_manual diff --git a/ci/bin/common.ml b/ci/bin/common.ml index 1bee1ae0568f..7dc76f5e7071 100644 --- a/ci/bin/common.ml +++ b/ci/bin/common.ml @@ -583,7 +583,7 @@ let changeset_mir_tzt = (no need to test that we pass the -static flag twice) - released variants exist, that are used in release tag pipelines (they do not build experimental executables) *) -let job_build_static_binaries ~__POS__ ~arch ?(cpu = Normal) +let job_build_static_binaries ~__POS__ ~arch ?(cpu = Normal) ?storage ?(executable_files = "script-inputs/octez-released-executables") ?(experimental_executables = "script-inputs/octez-experimental-executables") ?version_executable ?(release = false) ?rules ?dependencies ?retry () : @@ -611,6 +611,7 @@ let job_build_static_binaries ~__POS__ ~arch ?(cpu = Normal) ~stage:Stages.build ~arch ~cpu + ?storage ~name ?retry ~image:Images.CI.build @@ -653,8 +654,8 @@ type docker_build_type = | Test_manual (** Creates a Docker build job of the given [arch] and [docker_build_type]. *) -let job_docker_build ?rules ?dependencies ~__POS__ ~arch docker_build_type : - tezos_job = +let job_docker_build ?rules ?dependencies ~__POS__ ~arch ?storage + docker_build_type : tezos_job = let arch_string = arch_to_string_alt arch in let ci_docker_hub = match docker_build_type with @@ -709,6 +710,7 @@ let job_docker_build ?rules ?dependencies ~__POS__ ~arch docker_build_type : ~__POS__ ~stage ~arch + ?storage ~name ~variables ["./scripts/ci/docker_release.sh"] @@ -741,7 +743,7 @@ type bin_package_group = A | B let bin_package_image = Image.mk_external ~image_path:"$DISTRIBUTION" -let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?retry ?cpu +let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?retry ?cpu ?storage ?(release = false) ?dependencies () = let arch_string = arch_to_string arch in let name = @@ -804,6 +806,7 @@ let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?retry ?cpu ~arch ?retry ?cpu + ?storage ~name ~image:Images.CI.build ~before_script: @@ -823,10 +826,22 @@ let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?retry ?cpu (** {2 Shared jobs} *) let job_build_arm64_release ?rules () : tezos_job = - job_build_dynamic_binaries ?rules ~__POS__ ~arch:Arm64 ~release:true () + job_build_dynamic_binaries + ?rules + ~__POS__ + ~arch:Arm64 + ~storage:Ramfs + ~release:true + () let job_build_arm64_exp_dev_extra ?rules () : tezos_job = - job_build_dynamic_binaries ?rules ~__POS__ ~arch:Arm64 ~release:false () + job_build_dynamic_binaries + ?rules + ~__POS__ + ~arch:Arm64 + ~storage:Ramfs + ~release:false + () let job_build_kernels ?rules () : tezos_job = job diff --git a/ci/bin/master_branch.ml b/ci/bin/master_branch.ml index c5573e1b90f3..c0eaad7178f8 100644 --- a/ci/bin/master_branch.ml +++ b/ci/bin/master_branch.ml @@ -24,7 +24,12 @@ let rules_always = [job_rule ~when_:Always ()] (* static binaries *) let job_static_arm64 = - job_build_static_binaries ~__POS__ ~arch:Arm64 ~rules:rules_always () + job_build_static_binaries + ~__POS__ + ~arch:Arm64 + ~storage:Ramfs + ~rules:rules_always + () let job_static_x86_64 = job_build_static_binaries @@ -65,7 +70,12 @@ let octez_distribution_docker_jobs = job_docker_build ~__POS__ ~rules:rules_always ~arch:Amd64 Experimental in let job_docker_arm64_experimental : tezos_job = - job_docker_build ~__POS__ ~rules:rules_always ~arch:Arm64 Experimental + job_docker_build + ~__POS__ + ~rules:rules_always + ~arch:Arm64 + ~storage:Ramfs + Experimental in let job_docker_merge_manifests = job_docker_merge_manifests @@ -98,7 +108,12 @@ let jobs = {{:https://docs.gitlab.com/ee/ci/jobs/job_troubleshooting.html#jobs-or-pipelines-run-unexpectedly-when-using-changes} GitLab Docs: Jobs or pipelines run unexpectedly when using changes}. *) let job_static_arm64 = - job_build_static_binaries ~__POS__ ~arch:Arm64 ~rules:rules_always () + job_build_static_binaries + ~__POS__ + ~arch:Arm64 + ~storage:Ramfs + ~rules:rules_always + () in let job_static_x86_64 = job_build_static_binaries diff --git a/ci/bin/release_tag.ml b/ci/bin/release_tag.ml index 1bdafd3e931c..7df666415985 100644 --- a/ci/bin/release_tag.ml +++ b/ci/bin/release_tag.ml @@ -53,7 +53,7 @@ let monitoring_child_pipeline = job_datadog_pipeline_trace; Grafazos_ci.Common.job_build_grafazos (); job_build_layer1_profiling ~expire_in:Never (); - Teztale.Common.job_build ~expire_in:Never ~arch:Arm64 (); + Teztale.Common.job_build ~expire_in:Never ~arch:Arm64 ~storage:Ramfs (); Teztale.Common.job_build ~expire_in:Never ~arch:Amd64 (); ] @@ -113,6 +113,7 @@ let octez_jobs ?(test = false) release_tag_pipeline_type = ~dependencies:(Dependent []) ~__POS__ ~arch:Arm64 + ~storage:Ramfs (if test then Test else Release) in let job_docker_merge = @@ -129,6 +130,7 @@ let octez_jobs ?(test = false) release_tag_pipeline_type = ~dependencies:(Dependent []) ~__POS__ ~arch:Arm64 + ~storage:Ramfs ~release:true () in @@ -304,6 +306,7 @@ let octez_evm_node_jobs ?(test = false) () = job_docker_build ~__POS__ ~arch:Arm64 + ~storage:Ramfs (if test then Test else Octez_evm_node_release) in let job_docker_merge = @@ -329,6 +332,7 @@ let octez_evm_node_jobs ?(test = false) () = job_build_static_binaries ~__POS__ ~arch:Arm64 + ~storage:Ramfs ~executable_files:"script-inputs/octez-evm-node-executable" ~release:true ~version_executable:"octez-evm-node" diff --git a/ci/lib_tezos_ci/tezos_ci.ml b/ci/lib_tezos_ci/tezos_ci.ml index f9932b219668..290d64db62c4 100644 --- a/ci/lib_tezos_ci/tezos_ci.ml +++ b/ci/lib_tezos_ci/tezos_ci.ml @@ -736,6 +736,8 @@ type tag = | Gcp_high_cpu_dev | Gcp_very_high_cpu | Gcp_very_high_cpu_dev + | Gcp_very_high_cpu_ramfs + | Gcp_very_high_cpu_ramfs_dev | Aws_specific | Dynamic @@ -750,6 +752,8 @@ let string_of_tag = function | Gcp_high_cpu_dev -> "gcp_high_cpu_dev" | Gcp_very_high_cpu -> "gcp_very_high_cpu" | Gcp_very_high_cpu_dev -> "gcp_very_high_cpu_dev" + | Gcp_very_high_cpu_ramfs -> "gcp_very_high_cpu_ramfs" + | Gcp_very_high_cpu_ramfs_dev -> "gcp_very_high_cpu_ramfs_dev" | Aws_specific -> "aws_specific" | Dynamic -> Gitlab_ci.Var.encode dynamic_tag_var @@ -757,7 +761,8 @@ let string_of_tag = function let arch_of_tag = function | Gcp_arm64 | Gcp_dev_arm64 -> Some Arm64 | Gcp | Gcp_dev | Gcp_tezt | Gcp_tezt_dev | Gcp_high_cpu | Gcp_high_cpu_dev - | Gcp_very_high_cpu | Gcp_very_high_cpu_dev | Aws_specific -> + | Gcp_very_high_cpu | Gcp_very_high_cpu_dev | Gcp_very_high_cpu_ramfs + | Gcp_very_high_cpu_ramfs_dev | Aws_specific -> Some Amd64 | Dynamic -> None @@ -819,6 +824,11 @@ type cpu = | High (** Target GCP high runner pool. *) | Very_high (** Target GCP very high runner pool. *) +(** The list of storage profiling tags for runners. *) +type storage = + | Network (** Target default storage runner pool. *) + | Ramfs (** Target ramfs storage runner pool. *) + let enc_git_strategy = function | Fetch -> "fetch" | Clone -> "clone" @@ -827,36 +837,58 @@ let enc_git_strategy = function let job ?arch ?after_script ?allow_failure ?artifacts ?(before_script = []) ?cache ?id_tokens ?interruptible ?(dependencies = Staged []) ?(image_dependencies = []) ?services ?variables ?rules - ?(timeout = Gitlab_ci.Types.Minutes 60) ?tag ?(cpu = Normal) ?git_strategy - ?coverage ?retry ?parallel ?description ?(dev_infra = false) ~__POS__ ?image - ?template ~stage ~name script : tezos_job = + ?(timeout = Gitlab_ci.Types.Minutes 60) ?tag ?(cpu = Normal) + ?(storage = Network) ?git_strategy ?coverage ?retry ?parallel ?description + ?(dev_infra = false) ~__POS__ ?image ?template ~stage ~name script : + tezos_job = (* The tezos/tezos CI uses singleton tags for its runners. *) let tag = - match (arch, tag, cpu) with - | Some Arm64, _, (High | Very_high) -> + match (arch, tag, cpu, storage) with + | Some Arm64, _, (High | Very_high), _ -> failwith "[job] cannot specify both [arch=Arm64] and [cpu=High] or \ [cpu=Very_high] in job '%s'." name - | Some _, Some _, High - | None, Some _, High - | Some _, None, High - | None, None, High -> + | Some Arm64, _, _, Network -> + failwith + "[job] cannot specify both [arch=Arm64] and [storage=Network] in job \ + '%s'." + name + | None, None, Normal, Ramfs -> + failwith + "[job] cannot specify both [cpu=Normal] and [storage=Ramfs] in job \ + '%s'." + name + | None, _, High, Ramfs | Some _, _, High, Ramfs -> + failwith + "[job] cannot specify both [cpu=High] and [storage=Ramfs] in job \ + '%s'." + name + | Some _, Some _, High, Network + | None, Some _, High, Network + | Some _, None, High, Network + | None, None, High, Network -> if dev_infra then Gcp_high_cpu_dev else Gcp_high_cpu - | Some _, Some _, Very_high - | None, Some _, Very_high - | Some _, None, Very_high - | None, None, Very_high -> + | Some _, Some _, Very_high, Network + | None, Some _, Very_high, Network + | Some _, None, Very_high, Network + | None, None, Very_high, Network -> if dev_infra then Gcp_very_high_cpu_dev else Gcp_very_high_cpu - | Some arch, None, Normal -> ( + | Some _, Some _, Very_high, Ramfs + | None, Some _, Very_high, Ramfs + | Some _, None, Very_high, Ramfs + | None, None, Very_high, Ramfs -> + if dev_infra then Gcp_very_high_cpu_ramfs_dev + else Gcp_very_high_cpu_ramfs + | Some arch, None, Normal, _ -> ( match arch with | Amd64 -> if dev_infra then Gcp_dev else Gcp | Arm64 -> Gcp_arm64) - | None, Some tag, Normal -> tag - | None, None, Normal -> + | None, Some tag, _, _ -> tag + | None, None, Normal, Network -> (* By default, we assume Amd64 runners as given by the [gcp] tag. *) Gcp - | Some _, Some _, Normal -> + | Some _, Some _, Normal, _ -> failwith "[job] cannot specify both [arch] and [tags] at the same time in job \ '%s'." @@ -1398,8 +1430,8 @@ let opt_var name f = function Some value -> [(name, f value)] | None -> [] [CI_DOCKER_AUTH] contains the appropriate credentials. *) let job_docker_authenticated ?(skip_docker_initialization = false) ?ci_docker_hub ?artifacts ?(variables = []) ?rules ?dependencies - ?image_dependencies ?arch ?tag ?allow_failure ?parallel ?timeout ?retry - ?description ?dev_infra ~__POS__ ~stage ~name script : tezos_job = + ?image_dependencies ?arch ?storage ?tag ?allow_failure ?parallel ?timeout + ?retry ?description ?dev_infra ~__POS__ ~stage ~name script : tezos_job = let docker_version = "24.0.7" in job ?rules @@ -1407,6 +1439,7 @@ let job_docker_authenticated ?(skip_docker_initialization = false) ?image_dependencies ?artifacts ?arch + ?storage ?tag ?allow_failure ?parallel @@ -1514,10 +1547,11 @@ module Images = struct let rust_toolchain = (* The job that builds the rust_toolchain image. This job is automatically included in any pipeline that uses this image. *) - let image_builder arch = + let image_builder arch ?storage () = job_docker_authenticated ~__POS__ ~arch + ?storage ~skip_docker_initialization:true ~stage ~name:("oc.docker:rust-toolchain:" ^ arch_to_string_alt arch) @@ -1534,8 +1568,8 @@ module Images = struct "${rust_toolchain_image_name}:${rust_toolchain_image_tag}" in Image.mk_internal - ~image_builder_amd64:(image_builder Amd64) - ~image_builder_arm64:(image_builder Arm64) + ~image_builder_amd64:(image_builder Amd64 ()) + ~image_builder_arm64:(image_builder Arm64 ~storage:Ramfs ()) ~image_path () @@ -1543,10 +1577,11 @@ module Images = struct let rust_sdk_bindings = (* The job that builds the rust-sdk-bindings image. This job is automatically included in any pipeline that uses this image. *) - let image_builder arch = + let image_builder arch ?storage () = job_docker_authenticated ~__POS__ ~arch + ?storage ~stage ~name:("oc.docker:rust-sdk-bindings:" ^ arch_to_string_alt arch) ~description: @@ -1563,8 +1598,8 @@ module Images = struct "${rust_sdk_bindings_image_name}:${rust_sdk_bindings_image_tag}" in Image.mk_internal - ~image_builder_amd64:(image_builder Amd64) - ~image_builder_arm64:(image_builder Arm64) + ~image_builder_amd64:(image_builder Amd64 ()) + ~image_builder_arm64:(image_builder Arm64 ~storage:Ramfs ()) ~image_path () @@ -1587,7 +1622,7 @@ module Images = struct module CI = struct (* The job that builds the CI images. This job is automatically included in any pipeline that uses any of these images. *) - let job_docker_ci arch = + let job_docker_ci arch ?storage () = let variables = Some [("ARCH", arch_to_string_alt arch)] in let retry = match arch with @@ -1601,6 +1636,7 @@ module Images = struct ?retry ~__POS__ ~arch + ?storage ~skip_docker_initialization:true ~stage ~timeout:(Minutes 90) @@ -1613,8 +1649,8 @@ module Images = struct let mk_ci_image ~image_path = Image.mk_internal - ~image_builder_amd64:(job_docker_ci Amd64) - ~image_builder_arm64:(job_docker_ci Arm64) + ~image_builder_amd64:(job_docker_ci Amd64 ()) + ~image_builder_arm64:(job_docker_ci Arm64 ~storage:Ramfs ()) ~image_path () diff --git a/ci/lib_tezos_ci/tezos_ci.mli b/ci/lib_tezos_ci/tezos_ci.mli index b8f981e2890d..4e345aaa5b6c 100644 --- a/ci/lib_tezos_ci/tezos_ci.mli +++ b/ci/lib_tezos_ci/tezos_ci.mli @@ -304,6 +304,10 @@ type tag = (** GCP prod AMD64 runner, suitable for jobs needing very high CPU. *) | Gcp_very_high_cpu_dev (** GCP dev AMD64 runner, suitable for jobs needing very high CPU. *) + | Gcp_very_high_cpu_ramfs + (** GCP prod AMD64 runner, suitable for jobs needing very high CPU and RAMFS. *) + | Gcp_very_high_cpu_ramfs_dev + (** GCP dev AMD64 runner, suitable for jobs needing very high CPU and RAMFS. *) | Aws_specific (** AWS runners, in cases where a CI is legacy or not suitable for GCP. *) | Dynamic @@ -381,6 +385,10 @@ type cpu = | High (** Target GCP high runner pool. *) | Very_high (** Target GCP very high runner pool. *) +type storage = + | Network (** Target default storage runner pool. *) + | Ramfs (** Target ramfs storage runner pool. *) + (** Define a job. This smart constructor for {!Gitlab_ci.Types.job} additionally: @@ -437,6 +445,7 @@ val job : ?timeout:Gitlab_ci.Types.time_interval -> ?tag:tag -> ?cpu:cpu -> + ?storage:storage -> ?git_strategy:git_strategy -> ?coverage:string -> ?retry:Gitlab_ci.Types.retry -> @@ -536,6 +545,7 @@ val job_docker_authenticated : ?dependencies:dependencies -> ?image_dependencies:Image.t list -> ?arch:arch -> + ?storage:storage -> ?tag:tag -> ?allow_failure:Gitlab_ci.Types.allow_failure_job -> ?parallel:Gitlab_ci.Types.parallel -> @@ -654,7 +664,7 @@ module Images : sig val jsonnet : Image.t module CI : sig - val job_docker_ci : arch -> tezos_job + val job_docker_ci : arch -> ?storage:storage -> unit -> tezos_job val runtime : Image.t diff --git a/teztale/ci/common.ml b/teztale/ci/common.ml index d0cdb1cdab65..d9250f1d8d3a 100644 --- a/teztale/ci/common.ml +++ b/teztale/ci/common.ml @@ -14,12 +14,13 @@ let changeset = Changeset.(make ["teztale/**/*"]) (** Job that builds the Teztale executables *) let job_build ?rules ?(expire_in = Gitlab_ci.Types.(Duration (Days 1))) ?cpu - ~arch () = + ~arch ?storage () = let arch_string = arch_to_string arch in job ~__POS__ ~arch ?cpu + ?storage ~name:("teztale.build:static-" ^ arch_string) ~image:Images.CI.build ~stage:Stages.build diff --git a/teztale/ci/release.ml b/teztale/ci/release.ml index 88bdd82eb73b..4c3385c3b84a 100644 --- a/teztale/ci/release.ml +++ b/teztale/ci/release.ml @@ -31,7 +31,7 @@ let job_gitlab_release = (Dependent [ Artifacts (Common.job_build ~arch:Amd64 ()); - Artifacts (Common.job_build ~arch:Arm64 ()); + Artifacts (Common.job_build ~arch:Arm64 ~storage:Ramfs ()); ]) ~name:"gitlab:release" ["./teztale/scripts/releases/create_gitlab_release.sh"] @@ -53,7 +53,8 @@ let job_release_page ~test () = (Dependent [ Artifacts (Common.job_build ~expire_in:Never ~arch:Amd64 ()); - Artifacts (Common.job_build ~expire_in:Never ~arch:Arm64 ()); + Artifacts + (Common.job_build ~expire_in:Never ~arch:Arm64 ~storage:Ramfs ()); ]) ~variables: (if test then @@ -77,7 +78,7 @@ let jobs ~test () = (if test then [] else [job_datadog_pipeline_trace]) @ [ Common.job_build ~expire_in:Never ~arch:Amd64 (); - Common.job_build ~expire_in:Never ~arch:Arm64 (); + Common.job_build ~expire_in:Never ~arch:Arm64 ~storage:Ramfs (); job_gitlab_release; job_release_page ~test (); ] -- GitLab