From 7e8787382067db8bb62ce639c71215acf66e8c0d Mon Sep 17 00:00:00 2001 From: Neo <11726174-neo.nomadic@users.noreply.gitlab.com> Date: Fri, 7 Feb 2025 19:12:41 +0100 Subject: [PATCH 1/2] CI: reduce pipeline walltime, adding gcp_high_cpu gitlab tag --- .gitlab/ci/pipelines/before_merging.yml | 12 ++++---- .gitlab/ci/pipelines/merge_train.yml | 12 ++++---- .../ci/pipelines/schedule_extended_test.yml | 12 ++++---- ci/bin/code_verification.ml | 11 ++++++-- ci/bin/common.ml | 9 ++++-- ci/bin/master_branch.ml | 7 ++++- ci/lib_tezos_ci/tezos_ci.ml | 28 +++++++++++++------ ci/lib_tezos_ci/tezos_ci.mli | 11 +++++++- 8 files changed, 68 insertions(+), 34 deletions(-) diff --git a/.gitlab/ci/pipelines/before_merging.yml b/.gitlab/ci/pipelines/before_merging.yml index 0294125823f7..ca90a257a983 100644 --- a/.gitlab/ci/pipelines/before_merging.yml +++ b/.gitlab/ci/pipelines/before_merging.yml @@ -585,7 +585,7 @@ oc.build_x86_64-released: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -687,7 +687,7 @@ oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -860,7 +860,7 @@ ocaml-check: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - '**/*.ml' @@ -3519,7 +3519,7 @@ oc.unit:other-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -3605,7 +3605,7 @@ oc.unit:proto-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -3812,7 +3812,7 @@ oc.unit:protocol_compiles: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml diff --git a/.gitlab/ci/pipelines/merge_train.yml b/.gitlab/ci/pipelines/merge_train.yml index 230ab8f1788d..dda0833a3070 100644 --- a/.gitlab/ci/pipelines/merge_train.yml +++ b/.gitlab/ci/pipelines/merge_train.yml @@ -584,7 +584,7 @@ oc.build_x86_64-released: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -686,7 +686,7 @@ oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -859,7 +859,7 @@ ocaml-check: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - changes: - '**/*.ml' @@ -3518,7 +3518,7 @@ oc.unit:other-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -3604,7 +3604,7 @@ oc.unit:proto-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml @@ -3811,7 +3811,7 @@ oc.unit:protocol_compiles: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - changes: - .gitlab-ci.yml diff --git a/.gitlab/ci/pipelines/schedule_extended_test.yml b/.gitlab/ci/pipelines/schedule_extended_test.yml index fdddc38d558a..8d08c7514dc4 100644 --- a/.gitlab/ci/pipelines/schedule_extended_test.yml +++ b/.gitlab/ci/pipelines/schedule_extended_test.yml @@ -431,7 +431,7 @@ oc.build_x86_64-released: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - when: always dependencies: @@ -485,7 +485,7 @@ oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - when: always dependencies: @@ -584,7 +584,7 @@ ocaml-check: image: ${ci_image_name}/build:${ci_image_tag} stage: build tags: - - gcp + - gcp_high_cpu rules: - when: always dependencies: @@ -2418,7 +2418,7 @@ oc.unit:other-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - when: on_success needs: @@ -2479,7 +2479,7 @@ oc.unit:proto-x86_64: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - when: on_success needs: @@ -2611,7 +2611,7 @@ oc.unit:protocol_compiles: image: ${ci_image_name}/build:${ci_image_tag} stage: test tags: - - gcp + - gcp_high_cpu rules: - when: on_success needs: diff --git a/ci/bin/code_verification.ml b/ci/bin/code_verification.ml index 3cb6575ba981..66ef547d0f47 100644 --- a/ci/bin/code_verification.ml +++ b/ci/bin/code_verification.ml @@ -587,6 +587,7 @@ let jobs pipeline_type = job_build_dynamic_binaries ~__POS__ ~arch:Amd64 + ~high_cpu:true ~dependencies:dependencies_needs_start ~release:true ~rules:(make_rules ~changes:changeset_octez ()) @@ -602,6 +603,7 @@ let jobs pipeline_type = job_build_dynamic_binaries ~__POS__ ~arch:Amd64 + ~high_cpu:true ~dependencies:dependencies_needs_start ~release:false ~rules:(make_rules ~changes:changeset_octez ()) @@ -690,6 +692,7 @@ let jobs pipeline_type = job ~__POS__ ~name:"ocaml-check" + ~high_cpu:true ~image:Images.CI.build ~stage ~dependencies:dependencies_needs_start @@ -959,8 +962,8 @@ let jobs pipeline_type = make_rules ~changes:changeset_octez ~dependent:true () in let job_unit_test ~__POS__ ?(image = Images.CI.build) ?timeout - ?parallel_vector ?(rules = rules) ~arch ~name ~make_targets () : - tezos_job = + ?parallel_vector ?(rules = rules) ~arch ?(high_cpu = false) ~name + ~make_targets () : tezos_job = let arch_string = arch_to_string arch in let script = ["make $MAKE_TARGETS"] in let dependencies = build_dependencies arch in @@ -993,6 +996,7 @@ let jobs pipeline_type = ~stage:Stages.test ~image ~arch + ~high_cpu ~dependencies ~rules ~variables @@ -1038,6 +1042,7 @@ let jobs pipeline_type = ~__POS__ ~name:"oc.unit:other-x86_64" ~arch:Amd64 + ~high_cpu:true ~make_targets:["test-other-unit"] () |> enable_coverage_instrumentation |> enable_coverage_output_artifact @@ -1048,6 +1053,7 @@ let jobs pipeline_type = ~__POS__ ~name:"oc.unit:proto-x86_64" ~arch:Amd64 + ~high_cpu:true ~make_targets:["test-proto-unit"] () |> enable_coverage_instrumentation |> enable_coverage_output_artifact @@ -1087,6 +1093,7 @@ let jobs pipeline_type = ~__POS__ ~name:"oc.unit:protocol_compiles" ~arch:Amd64 + ~high_cpu:true ~image:Images.CI.build ~stage:Stages.test ~dependencies:(build_dependencies Amd64) diff --git a/ci/bin/common.ml b/ci/bin/common.ml index ba25bb8d7cc6..308246bf5466 100644 --- a/ci/bin/common.ml +++ b/ci/bin/common.ml @@ -638,7 +638,7 @@ let changeset_mir_tzt = (no need to test that we pass the -static flag twice) - released variants exist, that are used in release tag pipelines (they do not build experimental executables) *) -let job_build_static_binaries ~__POS__ ~arch +let job_build_static_binaries ~__POS__ ~arch ?(high_cpu = false) ?(executable_files = "script-inputs/released-executables") ?version_executable ?(release = false) ?rules ?dependencies () : tezos_job = let arch_string = arch_to_string arch in @@ -663,6 +663,7 @@ let job_build_static_binaries ~__POS__ ~arch ~__POS__ ~stage:Stages.build ~arch + ~high_cpu ~name ~image:Images.CI.build ~before_script:(before_script ~take_ownership:true ~eval_opam:true []) @@ -869,8 +870,8 @@ let job_build_rpm_amd64 : unit -> tezos_job = ~arch:Amd64 ~dependencies:(Dependent []) -let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?(release = false) - ?dependencies () = +let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?retry ?high_cpu + ?(release = false) ?dependencies () = let arch_string = arch_to_string arch in let name = sf @@ -930,6 +931,8 @@ let job_build_dynamic_binaries ?rules ~__POS__ ~arch ?(release = false) ~__POS__ ~stage:Stages.build ~arch + ?retry + ?high_cpu ~name ~image:Images.CI.build ~before_script: diff --git a/ci/bin/master_branch.ml b/ci/bin/master_branch.ml index d19bd8c807e1..4655815f588f 100644 --- a/ci/bin/master_branch.ml +++ b/ci/bin/master_branch.ml @@ -53,7 +53,12 @@ let jobs = job_build_static_binaries ~__POS__ ~arch:Arm64 ~rules:rules_always () in let job_static_x86_64 = - job_build_static_binaries ~__POS__ ~arch:Amd64 ~rules:rules_always () + job_build_static_binaries + ~__POS__ + ~arch:Amd64 + ~high_cpu:false + ~rules:rules_always + () in let job_unified_coverage_default : tezos_job = job diff --git a/ci/lib_tezos_ci/tezos_ci.ml b/ci/lib_tezos_ci/tezos_ci.ml index 61e390d9fc1e..7ccf8c48229e 100644 --- a/ci/lib_tezos_ci/tezos_ci.ml +++ b/ci/lib_tezos_ci/tezos_ci.ml @@ -737,6 +737,8 @@ type tag = | Gcp_tezt_memory_3k_dev | Gcp_tezt_memory_4k | Gcp_tezt_memory_4k_dev + | Gcp_high_cpu + | Gcp_high_cpu_dev | Aws_specific | Dynamic @@ -751,6 +753,8 @@ let string_of_tag = function | Gcp_tezt_memory_3k_dev -> "gcp_tezt_memory_3k_dev" | Gcp_tezt_memory_4k -> "gcp_tezt_memory_4k" | Gcp_tezt_memory_4k_dev -> "gcp_tezt_memory_4k_dev" + | Gcp_high_cpu -> "gcp_high_cpu" + | Gcp_high_cpu_dev -> "gcp_high_cpu_dev" | Aws_specific -> "aws_specific" | Dynamic -> Gitlab_ci.Var.encode dynamic_tag_var @@ -759,7 +763,7 @@ let arch_of_tag = function | Gcp_arm64 | Gcp_dev_arm64 -> Some Arm64 | Gcp | Gcp_dev | Gcp_tezt | Gcp_tezt_dev | Gcp_tezt_memory_3k | Gcp_tezt_memory_3k_dev | Gcp_tezt_memory_4k | Gcp_tezt_memory_4k_dev - | Aws_specific -> + | Gcp_high_cpu | Gcp_high_cpu_dev | Aws_specific -> Some Amd64 | Dynamic -> None @@ -823,18 +827,24 @@ let enc_git_strategy = function let job ?arch ?after_script ?allow_failure ?artifacts ?before_script ?cache ?id_tokens ?interruptible ?(dependencies = Staged []) ?(image_dependencies = []) ?services ?variables ?rules - ?(timeout = Gitlab_ci.Types.Minutes 60) ?tag ?git_strategy ?coverage ?retry - ?parallel ?description ~__POS__ ?image ?template ~stage ~name script : - tezos_job = + ?(timeout = Gitlab_ci.Types.Minutes 60) ?tag ?(high_cpu = false) + ?git_strategy ?coverage ?retry ?parallel ?description ~__POS__ ?image + ?template ~stage ~name script : tezos_job = (* The tezos/tezos CI uses singleton tags for its runners. *) let tag = - match (arch, tag) with - | Some arch, None -> ( match arch with Amd64 -> Gcp | Arm64 -> Gcp_arm64) - | None, Some tag -> tag - | None, None -> + match (arch, tag, high_cpu) with + | Some _, Some _, true + | None, Some _, true + | Some _, None, true + | None, None, true -> + Gcp_high_cpu + | Some arch, None, false -> ( + match arch with Amd64 -> Gcp | Arm64 -> Gcp_arm64) + | None, Some tag, false -> tag + | None, None, false -> (* By default, we assume Amd64 runners as given by the [gcp] tag. *) Gcp - | Some _, Some _ -> + | Some _, Some _, false -> failwith "[job] cannot specify both [arch] and [tags] at the same time in job \ '%s'." diff --git a/ci/lib_tezos_ci/tezos_ci.mli b/ci/lib_tezos_ci/tezos_ci.mli index fd6244b0c52f..b734a7dd7cc2 100644 --- a/ci/lib_tezos_ci/tezos_ci.mli +++ b/ci/lib_tezos_ci/tezos_ci.mli @@ -259,6 +259,10 @@ type tag = (** GCP prod AMD64 runner, suitable for tezt memory 4k jobs (more RAM and CPU) *) | Gcp_tezt_memory_4k_dev (** GCP dev AMD64 runner, suitable for tezt memory 4k jobs (more RAM and CPU) *) + | Gcp_high_cpu + (** GCP prod AMD64 runner, suitable for jobs needing high CPU. *) + | Gcp_high_cpu_dev + (** GCP dev AMD64 runner, suitable for jobs needing high CPU. *) | Aws_specific (** AWS runners, in cases where a CI is legacy or not suitable for GCP. *) | Dynamic @@ -360,7 +364,11 @@ val enc_git_strategy : git_strategy -> string - If the [image] used is {!Internal} and [tag] is set to {!Dynamic} then a run-time error is generated as the required architecture for the internal image cannot be statically - deduced. *) + deduced. + + - The [high_cpu] parameter allocates the job to run on top of a GCP GitLab runner with a 1:1 ratio between CPU and RAM. + For more information, see [e2-highcpu-16](https://gcloud-compute.com/e2-highcpu-16.html). *) + val job : ?arch:arch -> ?after_script:string list -> @@ -377,6 +385,7 @@ val job : ?rules:Gitlab_ci.Types.job_rule list -> ?timeout:Gitlab_ci.Types.time_interval -> ?tag:tag -> + ?high_cpu:bool -> ?git_strategy:git_strategy -> ?coverage:string -> ?retry:Gitlab_ci.Types.retry -> -- GitLab From c9edbc0a55ea86a2a052bd461f749a93f7479136 Mon Sep 17 00:00:00 2001 From: Neo <11726174-neo.nomadic@users.noreply.gitlab.com> Date: Fri, 7 Feb 2025 19:15:22 +0100 Subject: [PATCH 2/2] CI: reduce pipeline walltime, adding retry on gcp_high_cpu jobs --- .gitlab/ci/pipelines/before_merging.yml | 20 +++++++++++++++++++ .gitlab/ci/pipelines/merge_train.yml | 20 +++++++++++++++++++ .../ci/pipelines/schedule_extended_test.yml | 20 +++++++++++++++++++ ci/bin/code_verification.ml | 8 ++++++++ 4 files changed, 68 insertions(+) diff --git a/.gitlab/ci/pipelines/before_merging.yml b/.gitlab/ci/pipelines/before_merging.yml index ca90a257a983..1519f0d4690a 100644 --- a/.gitlab/ci/pipelines/before_merging.yml +++ b/.gitlab/ci/pipelines/before_merging.yml @@ -682,6 +682,11 @@ oc.build_x86_64-released: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} @@ -795,6 +800,11 @@ oc.build_x86_64-exp-dev-extra: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure wasm-runtime-check: image: ${ci_image_name}/build:${ci_image_tag} @@ -928,6 +938,11 @@ ocaml-check: DUNE_CACHE: enabled DUNE_CACHE_STORAGE_MODE: hardlink DUNE_CACHE_ROOT: $CI_PROJECT_DIR/_dune_cache + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_kernels: image: ${rust_toolchain_image_name}:${rust_toolchain_image_tag} @@ -3869,6 +3884,11 @@ oc.unit:protocol_compiles: CARGO_NET_OFFLINE: "false" SCCACHE_DIR: $CI_PROJECT_DIR/_sccache SCCACHE_CACHE_SIZE: 5G + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure de.unit:x86_64: image: ${ci_image_name}/test:${ci_image_tag} diff --git a/.gitlab/ci/pipelines/merge_train.yml b/.gitlab/ci/pipelines/merge_train.yml index dda0833a3070..c24c86f54d75 100644 --- a/.gitlab/ci/pipelines/merge_train.yml +++ b/.gitlab/ci/pipelines/merge_train.yml @@ -681,6 +681,11 @@ oc.build_x86_64-released: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} @@ -794,6 +799,11 @@ oc.build_x86_64-exp-dev-extra: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure wasm-runtime-check: image: ${ci_image_name}/build:${ci_image_tag} @@ -927,6 +937,11 @@ ocaml-check: DUNE_CACHE: enabled DUNE_CACHE_STORAGE_MODE: hardlink DUNE_CACHE_ROOT: $CI_PROJECT_DIR/_dune_cache + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_kernels: image: ${rust_toolchain_image_name}:${rust_toolchain_image_tag} @@ -3868,6 +3883,11 @@ oc.unit:protocol_compiles: CARGO_NET_OFFLINE: "false" SCCACHE_DIR: $CI_PROJECT_DIR/_sccache SCCACHE_CACHE_SIZE: 5G + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure de.unit:x86_64: image: ${ci_image_name}/test:${ci_image_tag} diff --git a/.gitlab/ci/pipelines/schedule_extended_test.yml b/.gitlab/ci/pipelines/schedule_extended_test.yml index 8d08c7514dc4..fd7480f43b8a 100644 --- a/.gitlab/ci/pipelines/schedule_extended_test.yml +++ b/.gitlab/ci/pipelines/schedule_extended_test.yml @@ -480,6 +480,11 @@ oc.build_x86_64-released: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_x86_64-exp-dev-extra: image: ${ci_image_name}/build:${ci_image_tag} @@ -545,6 +550,11 @@ oc.build_x86_64-exp-dev-extra: - _build/default/contrib/octez_injector_server/octez_injector_server.exe - etherlink-governance-observer when: on_success + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure wasm-runtime-check: image: ${ci_image_name}/build:${ci_image_tag} @@ -622,6 +632,11 @@ ocaml-check: DUNE_CACHE: enabled DUNE_CACHE_STORAGE_MODE: hardlink DUNE_CACHE_ROOT: $CI_PROJECT_DIR/_dune_cache + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure oc.build_kernels: image: ${rust_toolchain_image_name}:${rust_toolchain_image_tag} @@ -2643,6 +2658,11 @@ oc.unit:protocol_compiles: CARGO_NET_OFFLINE: "false" SCCACHE_DIR: $CI_PROJECT_DIR/_sccache SCCACHE_CACHE_SIZE: 5G + retry: + max: 2 + when: + - stuck_or_timeout_failure + - runner_system_failure de.unit:x86_64: image: ${ci_image_name}/test:${ci_image_tag} diff --git a/ci/bin/code_verification.ml b/ci/bin/code_verification.ml index 66ef547d0f47..c57608f91fae 100644 --- a/ci/bin/code_verification.ml +++ b/ci/bin/code_verification.ml @@ -588,6 +588,8 @@ let jobs pipeline_type = ~__POS__ ~arch:Amd64 ~high_cpu:true + ~retry: + {max = 2; when_ = [Stuck_or_timeout_failure; Runner_system_failure]} ~dependencies:dependencies_needs_start ~release:true ~rules:(make_rules ~changes:changeset_octez ()) @@ -604,6 +606,8 @@ let jobs pipeline_type = ~__POS__ ~arch:Amd64 ~high_cpu:true + ~retry: + {max = 2; when_ = [Stuck_or_timeout_failure; Runner_system_failure]} ~dependencies:dependencies_needs_start ~release:false ~rules:(make_rules ~changes:changeset_octez ()) @@ -695,6 +699,8 @@ let jobs pipeline_type = ~high_cpu:true ~image:Images.CI.build ~stage + ~retry: + {max = 2; when_ = [Stuck_or_timeout_failure; Runner_system_failure]} ~dependencies:dependencies_needs_start ~rules:(make_rules ~changes:changeset_ocaml_check_files ()) ~before_script: @@ -1094,6 +1100,8 @@ let jobs pipeline_type = ~name:"oc.unit:protocol_compiles" ~arch:Amd64 ~high_cpu:true + ~retry: + {max = 2; when_ = [Stuck_or_timeout_failure; Runner_system_failure]} ~image:Images.CI.build ~stage:Stages.test ~dependencies:(build_dependencies Amd64) -- GitLab