diff --git a/tezt/lib_cloud/deployement.ml b/tezt/lib_cloud/deployement.ml index ca570ba8558fd19acc39355cc6304362980d9baf..81096e547fe040cf02629a4c750e1db335459c4c 100644 --- a/tezt/lib_cloud/deployement.ml +++ b/tezt/lib_cloud/deployement.ml @@ -11,42 +11,28 @@ module Remote = struct type t = {agents : Agent.t list} - let wait_docker_running ~vm_name () = - let ssh_private_key_filename = Env.ssh_private_key_filename () in - let* zone = Env.zone () in - let is_ready _output = true in - let run () = - (* Try to get the docker images up. *) - Gcloud.compute_ssh - ~zone - ~vm_name - ~ssh_private_key_filename - "docker" - ["ps"; "--format"; "{{.Names}}"] - in - let* output = Env.wait_process ~is_ready ~run () in - let images_name = - output |> String.split_on_char '\n' |> List.filter (fun s -> s <> "") - in + let wait_docker_running agent ~container_name = let is_ready output = String.trim output |> Stdlib.bool_of_string in let run image_name () = (* Try to get the docker images that are actually running. *) - Gcloud.compute_ssh - ~zone - ~vm_name - ~ssh_private_key_filename + Agent.host_run_command + agent "docker" ["inspect"; "--format"; "{{.State.Running}}"; image_name] in - let* _ = - images_name - |> List.map (fun image_name -> - Env.wait_process ~is_ready ~run:(run image_name) ()) - |> Lwt.all - in + let* _ = Env.wait_process ~is_ready ~run:(run container_name) () in Lwt.return_unit - let workspace_deploy ~workspace_name ~number_of_vms ~configuration = + let wait_vm_running agent = + let is_ready _output = true in + let run () = + (* Try to get the docker images up. *) + Agent.host_run_command agent "echo" ["check"] + in + Env.wait_process ~is_ready ~run () + + let workspace_deploy ?(docker_args = []) ~workspace_name ~number_of_vms + ~configuration () = let* () = Terraform.VM.Workspace.select workspace_name in let* docker_image = Env.uri_of_docker_image configuration.Configuration.docker_image @@ -75,12 +61,6 @@ module Remote = struct Format.asprintf "%s-%03d" workspace_name (i + 1)) in let* zone = Env.zone () in - let* () = - if configuration.os = "cos" then - List.map (fun vm_name -> wait_docker_running ~vm_name ()) names - |> Lwt.join - else Lwt.return_unit - in let ssh_private_key_filename = Env.ssh_private_key_filename () in let make_agent vm_name = let* ip = Gcloud.get_ip_address_from_name ~zone vm_name in @@ -109,6 +89,91 @@ module Remote = struct |> Lwt.return in let* agents = names |> Lwt_list.map_p make_agent in + let* () = + agents + |> List.map (fun agent -> + let* () = + Agent.host_run_command + agent + "docker-credential-gcr" + [ + "configure-docker"; + "--registries"; + "europe-west1-docker.pkg.dev"; + ] + |> Process.check + in + let configuration = Agent.configuration agent in + let* docker_image = + Env.uri_of_docker_image configuration.docker_image + in + let container_name = Env.tezt_cloud in + let* () = + (* If the user configured debian, we don't run the + docker image on it. *) + if configuration.os = "debian" then Lwt.return_unit + else + let* _ = + Agent.host_run_command agent "docker" ["pull"; docker_image] + |> Process.check + in + (* This is easier to use. *) + let* _ = + Agent.host_run_command + agent + "docker" + ["tag"; docker_image; container_name] + |> Process.check + in + let* _ = + Agent.host_run_command + agent + "docker" + ["kill"; container_name] + |> Process.wait + in + let* _ = + Agent.host_run_command + agent + "docker" + ([ + "run"; + "-d"; + "--rm"; + "--name"; + container_name; + "-p"; + Format.asprintf + "%d-%d:%d-%d" + base_port + (base_port + ports_per_vm) + base_port + (base_port + ports_per_vm); + ] + @ docker_args @ [docker_image]) + |> Process.wait + in + let* _ = + Agent.host_run_command + agent + "sudo" + ["iptables"; "-A"; "INPUT"; "-p"; "tcp"; "-j"; "ACCEPT"] + |> Process.wait + in + (* This is easier to use. *) + let* _ = + Agent.host_run_command + agent + "docker" + ["tag"; docker_image; container_name] + |> Process.check + in + Lwt.return_unit + in + let* _ = wait_vm_running agent in + wait_docker_running agent ~container_name) + |> Lwt.join + in Lwt.return agents let order_agents agents configurations = @@ -133,8 +198,40 @@ module Remote = struct let configuration = Configuration.make () in let tezt_cloud = Env.tezt_cloud in let* () = Terraform.VM.Workspace.init ~tezt_cloud [workspace_name] in + let docker_args = + [ + (* This is because docker in docker requires some root + capabilities, in particular for communicating via the + docker socket. *) + "--privileged"; + (* This mount is also for docker in docker that requires + access to the docker socket. *) + "-v"; + "/var/run/docker.sock:/var/run/docker.sock"; + (* This is hopefully temporary. This is because + prometheus/grafana ports need to be accessed. In the + future, if we had a prometheus/grafana configuration we + could read from it and know the port directly. Right now, + this is a bit messy. *) + "--network"; + "host"; + ] + @ (if Env.website then ["-v"; "/tmp/website:/tmp/website"] else []) + @ (if Env.prometheus then ["-v"; "/tmp/prometheus:/tmp/prometheus"] + else []) + @ (if Env.grafana then ["-v"; "/tmp/grafana:/tmp/grafana"] else []) + (* Fixme: a boolean is missing to know when alert manager is running. *) + @ (if true then ["-v"; "/tmp/alert_manager:/tmp/alert_manager"] else []) + @ if Env.open_telemetry then ["-v"; "/tmp/otel:/tmp/otel"] else [] + in + let* agents = - workspace_deploy ~workspace_name ~configuration ~number_of_vms:1 + workspace_deploy + ~docker_args + ~workspace_name + ~configuration + ~number_of_vms:1 + () in match agents with [agent] -> Lwt.return agent | _ -> assert false @@ -199,7 +296,7 @@ module Remote = struct let* () = Terraform.VM.Workspace.select workspace_name in let* () = Terraform.VM.init () in let* agents = - workspace_deploy ~workspace_name ~number_of_vms ~configuration + workspace_deploy ~workspace_name ~number_of_vms ~configuration () in agents |> List.iter (fun agent -> diff --git a/tezt/lib_cloud/jobs.ml b/tezt/lib_cloud/jobs.ml index 6d4978bd40150c7f3a711e7a595b19ce87c84442..45198f69a2c15eacf5901f5a20a22a452cb74377 100644 --- a/tezt/lib_cloud/jobs.ml +++ b/tezt/lib_cloud/jobs.ml @@ -124,27 +124,10 @@ let clean_up_vms () = String.split_on_char '\n' output |> List.filter (fun str -> str <> "") in - let is_main_image image_name = - (* The main image created by Terraform at the - moment contains "--" in its name. This enables - to identify this image uniquely. While this is - not very robust, it should work for now. *) - let re = Str.regexp_string "--" in - try - ignore (Str.search_forward re image_name 0) ; - true - with Not_found -> false + let main_image = Env.tezt_cloud in + let other_images = + List.filter (fun image -> image <> main_image) images_name in - let main_images, other_images = - List.partition is_main_image images_name - in - if List.length main_images <> 1 then - Test.fail - "Unexpected setting. All the docker images found: %s. \ - There should only be one image which contains '--' in \ - the list" - (String.concat ";" images_name) ; - let main_image = List.hd main_images in let* _ = Gcloud.compute_ssh ~zone diff --git a/tezt/lib_cloud/terraform/vm/main.tf b/tezt/lib_cloud/terraform/vm/main.tf index 98822d1ee5fcc763e0fb1fdc55cc3a3a2f3630dc..1267cbe9c49393d7bd381797b49547576ce5f990 100644 --- a/tezt/lib_cloud/terraform/vm/main.tf +++ b/tezt/lib_cloud/terraform/vm/main.tf @@ -105,7 +105,7 @@ provider "google" { # A service account must be associated with a VM resource "google_service_account" "default" { account_id = "${terraform.workspace}-id" - display_name = "${terraform.workspace}" + display_name = terraform.workspace } # We want the service account to be able to fetch docker image from @@ -116,101 +116,6 @@ resource "google_project_iam_member" "artifact_registry_reader" { member = "serviceAccount:${google_service_account.default.email}" } -# This is an helper that enables to run the docker image once the -# machine is up -module "gce-container" { - source = "terraform-google-modules/container-vm/google" - version = "~> 3.0" - - container = { image = "${var.docker_image}" - - # This can be useful to execute some processes from the docker containers - # that requires some capabalities on the VM - securityContext = { - privileged = true - } - - # Volume settings is only necessary for the proxy VM - volumeMounts = [ - { - # Using the proxy mode, this is necessary if the docker image runs another docker image - mountPath = "/var/run/docker.sock" - name = "docker-socket" - readOnly = false - }, - { - # Necessary to provide access from the image docker to the website - mountPath = "/tmp/website" - name = "website" - readOnly = false - }, - { - # Same for Prometheus - mountPath = "/tmp/prometheus" - name = "prometheus" - readOnly = false - }, - { - # Same for Grafana - mountPath = "/tmp/grafana" - name = "grafana" - readOnly = false - }, - { - # Same for Alert manager - mountPath = "/tmp/alert_manager" - name = "alert-manager" - readOnly = false - }, - { - # Same for OpenTelemetry - mountPath = "/tmp/otel" - name = "otel" - readOnly = false - } - ] - } - - volumes = [ - { - name = "docker-socket" - hostPath = { - path = "/var/run/docker.sock" - } - }, - { - name = "website" - hostPath = { - path = "/tmp/website" - } - }, - { - name = "prometheus" - hostPath = { - path = "/tmp/prometheus" - } - }, - { - name = "alert-manager" - hostPath = { - path = "/tmp/alert_manager" - } - }, - { - name = "otel" - hostPath = { - path = "/tmp/otel" - } - }, - { - name = "grafana" - hostPath = { - path = "/tmp/grafana" - } - } - ] -} - # When running a VM, it must be associated with a Virtual Private # Cloud (VPC). A VPC is made of subnetworks (generally per region). # For this experiments, we want the VM to be reached from the internet @@ -277,7 +182,7 @@ resource "google_compute_firewall" "default" { # 16686 Provides access to the Jaeger web UI for tracing visualization. allow { protocol = "tcp" - ports = ["4317", "14250", "16686","55681"] + ports = ["4317", "14250", "16686", "55681"] } # Rule to enable static page web access @@ -328,11 +233,6 @@ resource "google_compute_instance_template" "default" { scopes = ["cloud-platform"] } - # This declares the docker image that must be run when the machine is up - metadata = { - gce-container-declaration = module.gce-container.metadata_value - } - # We register the subnetwork configuration network_interface { subnetwork = google_compute_subnetwork.default.self_link