diff --git a/tezt/lib_cloud/README.md b/tezt/lib_cloud/README.md index 97fc8a1b3523c1c9b1046d04dd582ffcba8341dd..8ee8a72b76283eed9e73449173ed16036e5a8a7c 100644 --- a/tezt/lib_cloud/README.md +++ b/tezt/lib_cloud/README.md @@ -53,7 +53,7 @@ Run the following commands: 7. (optional) `gcloud auth configure-docker europe-west1-docker.pkg.dev`: Authorize the artifact registry used by this project. -Please set an environment variable `TF_WORKSPACE` which will be used to identify +Please set an environment variable `TEZT_CLOUD` which will be used to identify the resources you deploy onto the cloud. The value should be unique for the given project. We recommend using a similar username as the one you use on Gitlab. @@ -97,7 +97,7 @@ below. Terraform maintains a state of the deployed resources that are *owned* by `terraform`. A standard way to store them is to use a GCP bucket. -Once your `TF_WORKSPACE` value is set and you have the appropriate +Once your `TEZT_CLOUD` value is set and you have the appropriate rights, you should be able to create the bucket using: ```bash @@ -109,7 +109,7 @@ dune exec tezt/tests/main.exe -- terraform state deploy bucket -v A docker registry is needed so that, during initialisation of the VMs, the docker image can be pulled from the cloud. -Once your `TF_WORKSPACE` value is set and you have the appropriate +Once your `TEZT_CLOUD` value is set and you have the appropriate rights, you should be able to create the registry using: ```bash @@ -274,7 +274,7 @@ exported to the Prometheus instance using the function where: - `ssh-key` is the one you generated above - - `workspace` is the content of the `$TF_WORKSPACE` variable + - `workspace` is the content of the `$TEZT_CLOUD` variable - `XYZ` is the counter of the VM - `zone` is the zone of the VM (default is `europe-west1-b`) diff --git a/tezt/lib_cloud/cloud.ml b/tezt/lib_cloud/cloud.ml index 5179623ff93456ac0d6620617b43bcdce8898533..de55f860036c517af8d575a66cadbd03aa9b03fd 100644 --- a/tezt/lib_cloud/cloud.ml +++ b/tezt/lib_cloud/cloud.ml @@ -88,44 +88,32 @@ let rec wait_ssh_server_running runner = let register ?vms ~__FILE__ ~title ~tags ?seed f = Test.register ~__FILE__ ~title ~tags ?seed @@ fun () -> - let configuration = + let vms = (* The Cli arguments by-pass the argument given here. This enable the user to always have decide precisely the number of vms to be run. *) match (vms, Cli.vms) with | None, None -> None - | None, Some i | Some _, Some i -> Some i - | Some i, None -> Some i + | None, Some i | Some _, Some i -> + let vms = + List.init i (fun _ -> Deployement.{machine_type = Cli.machine_type}) + in + Some vms + | Some vms, None -> Some vms in - match configuration with + match vms with | None -> (* If there is no configuration, it is a similar scenario as if there were not agent. *) f {agents = []; website = None; prometheus = None; deployement = None} - | Some number_of_vms -> + | Some configurations -> let ports_per_vm = Cli.ports_per_vm in let* deployement = Deployement.deploy ~ports_per_vm - ~number_of_vms + ~configurations ~localhost:Cli.localhost () in - let* points = Deployement.get_points deployement in - let names = - List.init number_of_vms (fun i -> Format.asprintf "vm_%d" i) - in - let ssh_id = Lazy.force Env.ssh_private_key in - let agents = - points - |> List.map2 (fun left right -> (left, right)) names - |> List.map (fun (name, point) -> - Agent.make - ~ssh_id - ~point - ~next_available_port:(fun () -> - Deployement.next_port deployement point) - ~name - ()) - in + let agents = Deployement.agents deployement in let* () = agents |> List.map (fun agent -> Agent.runner agent |> wait_ssh_server_running) @@ -145,7 +133,9 @@ let register ?vms ~__FILE__ ~title ~tags ?seed f = address)) ; let* website = if Cli.website then - let* website = Web.start ~port:Cli.website_port ~agents in + let* website = + Web.start ~port:Cli.website_port ~deployement ~agents + in Lwt.return_some website else Lwt.return_none in @@ -166,7 +156,7 @@ let register ?vms ~__FILE__ ~title ~tags ?seed f = (fun exn -> Lwt.return_some exn) in (* This part is tricky! We want to catch Ctrl+C so that tezt does not - kill all the process directly before tezt-cloud termination tasks are + kill all the VMs directly before tezt-cloud termination tasks are over. When the signal is caught, tezt-cloud takes over. Processes are cleaned up manually via [Process.clean_up ()]. *) @@ -182,6 +172,15 @@ let register ?vms ~__FILE__ ~title ~tags ?seed f = let agents t = t.agents +type vm_configuration = Deployement.configuration = {machine_type : string} + +let default_vm_configuration = {machine_type = Cli.machine_type} + +let get_configuration t agent = + match t.deployement with + | None -> default_vm_configuration + | Some deployement -> Deployement.get_configuration deployement agent + let set_agent_name t agent name = Agent.set_name agent name ; let* () = diff --git a/tezt/lib_cloud/cloud.mli b/tezt/lib_cloud/cloud.mli index 6b03a7118474e313a7a61e2f71450d42d9e5f42f..8fb84240c2cba58eba3bb45b3d93ef38986971f1 100644 --- a/tezt/lib_cloud/cloud.mli +++ b/tezt/lib_cloud/cloud.mli @@ -5,13 +5,17 @@ (* *) (*****************************************************************************) +type vm_configuration = Deployement.configuration = {machine_type : string} + +val default_vm_configuration : vm_configuration + type t (** [register ?vms] is a wrapper around [Test.register]. It enables to run a test that can use machines deployed onto the cloud. *) val register : - ?vms:int -> + ?vms:vm_configuration list -> __FILE__:string -> title:string -> tags:string list -> @@ -21,6 +25,8 @@ val register : val agents : t -> Agent.t list +val get_configuration : t -> Agent.t -> vm_configuration + val push_metric : t -> ?labels:(string * string) list -> name:string -> int -> unit diff --git a/tezt/lib_cloud/deployement.ml b/tezt/lib_cloud/deployement.ml index 91c5edcffbaaf1548f075b0099522389dedfe339..1004dddbc0b38440c2ed9355555af88a84e4ccf1 100644 --- a/tezt/lib_cloud/deployement.ml +++ b/tezt/lib_cloud/deployement.ml @@ -5,19 +5,24 @@ (* *) (*****************************************************************************) +type configuration = {machine_type : string} + module Remote = struct + type workspace_info = {configuration : configuration; number_of_vms : int} + + type point_info = {workspace_name : string; gcp_name : string} + + type address = string + type t = { base_port : int; ports_per_vm : int; - next_port : (string * int, int) Hashtbl.t; - names : (string, string) Hashtbl.t; + agents_info : (address, point_info) Hashtbl.t; + agents : Agent.t list; + workspaces_info : (string, workspace_info) Hashtbl.t; zone : string; } - let get_points base_port = - let* addresses = Terraform.VM.points () in - List.map (fun address -> (address, base_port)) addresses |> Lwt.return - let rec wait_docker_running ~zone ~vm_name = let*? process = Gcloud.compute_ssh @@ -66,12 +71,9 @@ module Remote = struct let* () = Lwt_unix.sleep 2. in wait_docker_running ~zone ~vm_name - let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~number_of_vms () = - let workspace = Lazy.force Env.workspace in - let docker_registry = Format.asprintf "%s-docker-registry" workspace in - let machine_type = Cli.machine_type in - let* () = Terraform.Docker_registry.init () in - let* () = Terraform.VM.init () in + let workspace_deploy ?(base_port = 30_000) ?(ports_per_vm = 50) + ~workspace_name ~machine_type ~number_of_vms ~docker_registry () = + let* () = Terraform.VM.Workspace.select workspace_name in let* () = Terraform.VM.deploy ~machine_type @@ -81,16 +83,14 @@ module Remote = struct ~docker_registry in let names = - Seq.ints 1 |> Seq.take number_of_vms - |> Seq.map (fun i -> Format.asprintf "%s-%03d" workspace i) - |> List.of_seq + List.init number_of_vms (fun i -> + Format.asprintf "%s-%03d" workspace_name (i + 1)) in let* zone = Terraform.VM.zone () in let* () = List.map (fun vm_name -> wait_docker_running ~zone ~vm_name) names |> Lwt.join in - let next_port = Hashtbl.create number_of_vms in let* () = let run_command ~vm_name cmd args = Gcloud.compute_ssh ~zone ~vm_name cmd args @@ -102,29 +102,118 @@ module Remote = struct |> Lwt.join else Lwt.return_unit in - let* points = get_points base_port in - List.iter - (fun point -> Hashtbl.replace next_port point (base_port + 1)) - points ; - let* ips = - names - |> Lwt_list.map_p (fun name -> Gcloud.get_ip_address_from_name ~zone name) + let ssh_id = Lazy.force Env.ssh_private_key in + let agent_of_name name = + let* ip = Gcloud.get_ip_address_from_name ~zone name in + let point = (ip, base_port) in + let next_available_port = + let port = ref base_port in + fun () -> + incr port ; + !port + in + Agent.make ~ssh_id ~point ~next_available_port ~name () |> Lwt.return in - let names = List.combine names ips |> List.to_seq |> Hashtbl.of_seq in - Lwt.return {base_port; ports_per_vm; next_port; names; zone} + let* agents = names |> Lwt_list.map_p agent_of_name in + Lwt.return agents - let run_command {names; zone; _} ~address cmd args = - let vm_name = Hashtbl.find names address in - Gcloud.compute_ssh ~zone ~vm_name cmd args + let get_configuration agents_info workspaces_info agent = + let address = agent |> Agent.runner |> Option.some |> Runner.address in + let {workspace_name; _} = Hashtbl.find agents_info address in + let {configuration; _} = Hashtbl.find workspaces_info workspace_name in + configuration + + let order_agents agents_info workspaces_info agents configurations = + let bindings = + agents + |> List.map (fun agent -> + let configuration = + get_configuration agents_info workspaces_info agent + in + (configuration, agent)) + in + let rec order configurations bindings = + match configurations with + | [] -> [] + | configuration :: configurations -> + let agent = List.assoc configuration bindings in + let bindings = List.remove_assoc configuration bindings in + agent :: order configurations bindings + in + order configurations bindings + + let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~configurations () = + let tezt_cloud = Lazy.force Env.tezt_cloud in + let docker_registry = Format.asprintf "%s-docker-registry" tezt_cloud in + let workspaces_info = Hashtbl.create 11 in + let agents_info = Hashtbl.create 11 in + let () = + List.to_seq configurations |> Seq.group ( = ) + |> Seq.iteri (fun i seq -> + let configuration = Seq.uncons seq |> Option.get |> fst in + let name = Format.asprintf "%s-%d" tezt_cloud i in + Hashtbl.add + workspaces_info + name + {configuration; number_of_vms = Seq.length seq}) + in + let* () = Terraform.Docker_registry.init () in + let* () = Terraform.VM.init () in + let workspaces_names = + workspaces_info |> Hashtbl.to_seq_keys |> List.of_seq + in + let* () = Terraform.VM.Workspace.init workspaces_names in + let* agents = + workspaces_info |> Hashtbl.to_seq |> List.of_seq + |> Lwt_list.map_s + (fun (workspace_name, {configuration = {machine_type}; number_of_vms}) + -> + let* () = Terraform.VM.Workspace.select workspace_name in + let* () = Terraform.VM.init () in + let* agents = + workspace_deploy + ~base_port + ~ports_per_vm + ~workspace_name + ~machine_type + ~number_of_vms + ~docker_registry + () + in + agents + |> List.iter (fun agent -> + (* We index the table per address to identify uniquely the agent. *) + let address = + agent |> Agent.runner |> Option.some |> Runner.address + in + Hashtbl.add + agents_info + address + {workspace_name; gcp_name = Agent.name agent}) ; + Lwt.return agents) + in + let agents = + order_agents + agents_info + workspaces_info + (List.concat agents) + configurations + in + let* zone = Terraform.VM.zone () in + Lwt.return + {base_port; ports_per_vm; zone; agents_info; agents; workspaces_info} - let get_points t = get_points t.base_port + let get_configuration t agent = + get_configuration t.agents_info t.workspaces_info agent - let next_port t point = - let port = Hashtbl.find t.next_port point in - Hashtbl.replace t.next_port point (port + 1) ; - port + let run_vm_command {agents_info; zone; _} agent cmd args = + let address = agent |> Agent.runner |> Option.some |> Runner.address in + let {gcp_name = vm_name; _} = Hashtbl.find agents_info address in + Gcloud.compute_ssh ~zone ~vm_name cmd args - let terminate ?exn _t = + let agents t = t.agents + + let terminate ?exn t = (match exn with | None -> Log.report ~color:Log.Color.FG.green "Scenario ended successfully." @@ -135,7 +224,9 @@ module Remote = struct (Printexc.to_string exn)) ; if Cli.destroy then ( Log.report "Destroying VMs, this may take a while..." ; - Terraform.VM.destroy ()) + let workspaces = Hashtbl.to_seq_keys t.workspaces_info |> List.of_seq in + let* () = Terraform.VM.destroy workspaces in + Terraform.VM.Workspace.destroy ()) else ( Log.report "No VM destroyed! Don't forget to destroy them when you are done with \ @@ -149,24 +240,27 @@ module Localhost = struct processes : Process.t list; base_port : int; ports_per_vm : int; - next_port : (string * int, int) Hashtbl.t; names : (string, string) Hashtbl.t; + agents : Agent.t list; } - let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~number_of_vms () = + let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~configurations () = (* We need to intialize the docker registry even on localhost to fetch the docker image. *) let* () = Terraform.Docker_registry.init () in let* docker_registry = Terraform.Docker_registry.get_docker_registry () in - let workspace = Lazy.force Env.workspace in + let tezt_cloud = Lazy.force Env.tezt_cloud in let image_name = - Format.asprintf "%s/%s:%s" docker_registry workspace "latest" + Format.asprintf "%s/%s:%s" docker_registry tezt_cloud "latest" in let names = Hashtbl.create 11 in + (* The current configuration is actually unused in localhost. Only the + number of VMs matters. *) + let number_of_vms = List.length configurations in let processes = Seq.ints 0 |> Seq.take number_of_vms |> Seq.map (fun i -> - let name = Format.asprintf "%s-%03d" workspace (i + 1) in + let name = Format.asprintf "%s-%03d" tezt_cloud (i + 1) in let start = base_port + (i * ports_per_vm) |> string_of_int in let stop = base_port + ((i + 1) * ports_per_vm) - 1 |> string_of_int @@ -200,25 +294,42 @@ module Localhost = struct in if Cli.monitoring then Monitoring.run ~run_command else Lwt.return_unit in + let ssh_id = Lazy.force Env.ssh_private_key in + let get_point i = + let port = base_port + (i * ports_per_vm) in + ("localhost", port) + in + let next_port point = + let port = Hashtbl.find next_port point in + Hashtbl.replace next_port point (port + 1) ; + port + in + let agents = + List.init number_of_vms (fun i -> + let name = Format.asprintf "localhost_docker_%d" i in + let point = get_point i in + Agent.make + ~ssh_id + ~point + ~next_available_port:(fun () -> next_port point) + ~name + ()) + in Lwt.return - {number_of_vms; processes; base_port; ports_per_vm; next_port; names} + {number_of_vms; processes; base_port; ports_per_vm; names; agents} - let run_command _t ~address:_ cmd args = + (* Since in [localhost] mode, the VM is the host machine, this comes back + to just run a command on the host machine. *) + let run_vm_command cmd args = let value : Process.t = Process.spawn cmd args in let run process = Process.check_and_read_stdout process in {value; run} - let get_points {number_of_vms; base_port; ports_per_vm; _} = - Seq.ints 0 |> Seq.take number_of_vms - |> Seq.map (fun i -> - let port = base_port + (i * ports_per_vm) in - ("localhost", port)) - |> List.of_seq |> Lwt.return + let agents t = t.agents - let next_port t point = - let port = Hashtbl.find t.next_port point in - Hashtbl.replace t.next_port point (port + 1) ; - port + let get_configuration _t _agent = + (* The configuration is not used in localhost. *) + {machine_type = Cli.machine_type} let terminate ?exn _t = (match exn with @@ -244,31 +355,31 @@ end type t = Remote of Remote.t | Localhost of Localhost.t -let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~number_of_vms ~localhost +let deploy ?(base_port = 30_000) ?(ports_per_vm = 50) ~configurations ~localhost () = if localhost then let* localhost = - Localhost.deploy ~base_port ~ports_per_vm ~number_of_vms () + Localhost.deploy ~base_port ~ports_per_vm ~configurations () in Lwt.return (Localhost localhost) else - let* remote = Remote.deploy ~base_port ~ports_per_vm ~number_of_vms () in + let* remote = Remote.deploy ~base_port ~ports_per_vm ~configurations () in Lwt.return (Remote remote) -let run_command t ~address cmd args = +let run_vm_command t agent cmd args = match t with - | Remote remote -> Remote.run_command remote ~address cmd args - | Localhost localhost -> Localhost.run_command localhost ~address cmd args + | Remote remote -> Remote.run_vm_command remote agent cmd args + | Localhost _localhost -> Localhost.run_vm_command cmd args -let get_points t = +let agents t = match t with - | Remote remote -> Remote.get_points remote - | Localhost localhost -> Localhost.get_points localhost + | Remote remote -> Remote.agents remote + | Localhost localhost -> Localhost.agents localhost -let next_port t point = +let get_configuration t = match t with - | Remote remote -> Remote.next_port remote point - | Localhost localhost -> Localhost.next_port localhost point + | Remote remote -> Remote.get_configuration remote + | Localhost localhost -> Localhost.get_configuration localhost let terminate ?exn t = match t with diff --git a/tezt/lib_cloud/deployement.mli b/tezt/lib_cloud/deployement.mli index 87b43b7ef0e21c02506abba2ae8c011750b8172e..2725d90e67b8790aaefac8bf5864ce704f5c7f8b 100644 --- a/tezt/lib_cloud/deployement.mli +++ b/tezt/lib_cloud/deployement.mli @@ -8,6 +8,8 @@ (** Type for resources managed by a deployment. *) type t +type configuration = {machine_type : string} + (** [deploy ?base_port ?ports_per_vm ~number_of_vms ~machine_type ()] deploys the expected number of vms. For each vm, we can specify a [base_port] which is the first port to be opened and [ports_per_vm] specify the @@ -17,21 +19,21 @@ type t val deploy : ?base_port:int -> ?ports_per_vm:int -> - number_of_vms:int -> + configurations:configuration list -> localhost:bool -> unit -> t Lwt.t -(** [run_command t ~address cmd args] can run a command on the vm located at - [address]. *) -val run_command : - t -> address:string -> string -> string list -> (Process.t, string) Runnable.t +(** [get_agents t] returns the list of agents deployed. *) +val agents : t -> Agent.t list -(** [get_points t] returns the points associated to the deployed machines. *) -val get_points : t -> (string * int) list Lwt.t +val get_configuration : t -> Agent.t -> configuration -(** [next_port t] returns the next available port for the given point. *) -val next_port : t -> string * int -> int +(** [run_vm_command t ~address cmd args] can run a command on the vm located at + [address]. This is different from running a commend on the agent directly + since the agent runs on a docker image. *) +val run_vm_command : + t -> Agent.t -> string -> string list -> (Process.t, string) Runnable.t (** [terminate ?exn t] should be called to tear down the machine. Do note that this call may or may not destroy the machine depending on diff --git a/tezt/lib_cloud/docker.ml b/tezt/lib_cloud/docker.ml index bd041aa396a13ff228ad7e6ad9ddbaece3f5708a..c7b5e9d57140bc06a4169075b408dfe4d68861db 100644 --- a/tezt/lib_cloud/docker.ml +++ b/tezt/lib_cloud/docker.ml @@ -15,23 +15,25 @@ let build ?(tag = "latest") ?dockerfile ~args () = ["--build-arg"; Format.asprintf "%s=%s" key value]) |> List.concat in - let workspace = Lazy.force Env.workspace in + let tezt_cloud = Lazy.force Env.tezt_cloud in let dockerfile = - Option.value ~default:(Path.docker // workspace // ".Dockerfile") dockerfile + Option.value + ~default:(Path.docker // tezt_cloud // ".Dockerfile") + dockerfile in - let tag = ["-t"; Format.asprintf "%s:%s" workspace tag] in + let tag = ["-t"; Format.asprintf "%s:%s" tezt_cloud tag] in let args = ["build"; "-f"; dockerfile] @ build_args @ tag @ ["."] in let value = Process.spawn ~name ~color "docker" args in let run = Process.check in {value; run} let tag ?(tag = "latest") docker_registry = - let workspace = Lazy.force Env.workspace in + let tezt_cloud = Lazy.force Env.tezt_cloud in let args = [ "tag"; - Format.asprintf "%s:%s" workspace tag; - Format.asprintf "%s/%s:%s" docker_registry workspace tag; + Format.asprintf "%s:%s" tezt_cloud tag; + Format.asprintf "%s/%s:%s" docker_registry tezt_cloud tag; ] in let value = Process.spawn ~name ~color "docker" args in @@ -39,18 +41,18 @@ let tag ?(tag = "latest") docker_registry = {value; run} let push ?(tag = "latest") docker_registry = - let workspace = Lazy.force Env.workspace in + let tezt_cloud = Lazy.force Env.tezt_cloud in let args = - ["push"; Format.asprintf "%s/%s:%s" docker_registry workspace tag] + ["push"; Format.asprintf "%s/%s:%s" docker_registry tezt_cloud tag] in let value = Process.spawn ~name ~color "docker" args in let run = Process.check in {value; run} let pull ?(tag = "latest") docker_registry = - let workspace = Lazy.force Env.workspace in + let tezt_cloud = Lazy.force Env.tezt_cloud in let args = - ["pull"; Format.asprintf "%s/%s:%s" docker_registry workspace tag] + ["pull"; Format.asprintf "%s/%s:%s" docker_registry tezt_cloud tag] in let value = Process.spawn ~name ~color "docker" args in let run = Process.check in diff --git a/tezt/lib_cloud/env.ml b/tezt/lib_cloud/env.ml index 29c099ed4d9ec00d3cf2612e23fc4a63a65aeb57..2a40ff64f33076d210859dca381a2d5663d49465 100644 --- a/tezt/lib_cloud/env.ml +++ b/tezt/lib_cloud/env.ml @@ -5,21 +5,21 @@ (* *) (*****************************************************************************) -let workspace = +let tezt_cloud = (* This is a lazy value to be sure that this is evaluated only inside a Tezt test. *) Lazy.from_fun @@ fun () -> - match Sys.getenv_opt "TF_WORKSPACE" with + match Sys.getenv_opt "TEZT_CLOUD" with | None -> Test.fail - "The environment variable 'TF_WORKSPACE' is not defined. See README \ - for more information why this variable must be defined." + "The environment variable 'TEZT_CLOUD' is not defined. See README for \ + more information why this variable must be defined." | Some value -> value let ssh_private_key = Lazy.from_fun (fun () -> let home = Sys.getenv "HOME" in - let workspace = Lazy.force workspace in - home // ".ssh" // Format.asprintf "%s-tf" workspace) + let tezt_cloud = Lazy.force tezt_cloud in + home // ".ssh" // Format.asprintf "%s-tf" tezt_cloud) let ssh_public_key = Lazy.from_fun (fun () -> @@ -28,5 +28,5 @@ let ssh_public_key = let dockerfile = Lazy.from_fun (fun () -> - let workspace = Lazy.force workspace in - Path.docker // Format.asprintf "%s.Dockerfile" workspace) + let tezt_cloud = Lazy.force tezt_cloud in + Path.docker // Format.asprintf "%s.Dockerfile" tezt_cloud) diff --git a/tezt/lib_cloud/env.mli b/tezt/lib_cloud/env.mli index f627f8d67ba580906dc6f319c4ec67db71bda11e..689ab748ddca0291efc51f328fa4f04fa8929e94 100644 --- a/tezt/lib_cloud/env.mli +++ b/tezt/lib_cloud/env.mli @@ -9,8 +9,8 @@ variables. Those values are lazy so that they are evaluated within a test and fail if one environment variable is missing. *) -(** Value of the environment variable [TF_WORKSPACE]. *) -val workspace : string Lazy.t +(** Value of the environment variable [TEZT_CLOUD_BASENAME]. *) +val tezt_cloud : string Lazy.t (** Path to the ssh private key that will be used with the docker image. Depends on [workspace]. *) diff --git a/tezt/lib_cloud/jobs.ml b/tezt/lib_cloud/jobs.ml index 53bc8e3a76420b0f3cdfc930ddb763642df30491..d127b893d79670fdb53be1e342fe6b20cd5fea58 100644 --- a/tezt/lib_cloud/jobs.ml +++ b/tezt/lib_cloud/jobs.ml @@ -16,8 +16,8 @@ let docker_push ~tags = ~title:"Push the dockerfile to the registry" ~tags:("docker" :: "push" :: tags) @@ fun () -> - let workspace = Lazy.force Env.workspace in - Log.info "Workspace found with value: %s" workspace ; + let tezt_cloud = Lazy.force Env.tezt_cloud in + Log.info "TEZT_CLOUD_BASENAME variable found with value: %s" tezt_cloud ; let ssh_public_key = Lazy.force Env.ssh_public_key in Log.info "Checking the existence of ssh public key '%s'..." ssh_public_key ; let* ssh_public_key = @@ -31,7 +31,7 @@ let docker_push ~tags = in Log.info "Checking the existence of the docker file %s.Dockerfile..." - workspace ; + tezt_cloud ; let dockerfile = Lazy.force Env.dockerfile in if not (Sys.file_exists dockerfile) then Test.fail @@ -59,7 +59,7 @@ let docker_push ~tags = ("BINARIES_DESTINATION_PATH", Agent.default_binaries_path ()); ] in - Log.info "Building image from %s.Dockerfile..." workspace ; + Log.info "Building image from %s.Dockerfile..." tezt_cloud ; let*! () = Docker.build ~dockerfile ~args () in Log.info "Tagging the image..." ; let*! () = Docker.tag docker_registry in @@ -73,8 +73,8 @@ let deploy_docker_registry ~tags = ~title:"Deploy docker registry" ~tags:("docker" :: "registry" :: "deploy" :: tags) @@ fun () -> - let workspace = Lazy.force Env.workspace in - Log.info "Workspace found with value: %s" workspace ; + let tezt_cloud = Lazy.force Env.tezt_cloud in + Log.info "Tezt_Cloud found with value: %s" tezt_cloud ; let* () = Terraform.Docker_registry.init () in Terraform.Docker_registry.deploy () @@ -84,8 +84,8 @@ let deploy_terraform_state_bucket ~tags = ~title:"Deploy terraform state bucket" ~tags:("terraform" :: "state" :: "bucket" :: "deploy" :: tags) @@ fun () -> - let workspace = Lazy.force Env.workspace in - Log.info "Workspace found with value: %s" workspace ; + let tezt_cloud = Lazy.force Env.tezt_cloud in + Log.info "Tezt_Cloud found with value: %s" tezt_cloud ; let* () = Terraform.State_bucket.init () in Terraform.State_bucket.deploy () @@ -95,9 +95,12 @@ let destroy_vms ~tags = ~title:"Destroy terraform VMs" ~tags:("terraform" :: "destroy" :: tags) @@ fun () -> - let workspace = Lazy.force Env.workspace in - Log.info "Workspace found with value: %s" workspace ; - Terraform.VM.destroy () + let tezt_cloud = Lazy.force Env.tezt_cloud in + Log.info "Tezt_Cloud found with value: %s" tezt_cloud ; + let* () = Terraform.VM.Workspace.select "default" in + let* workspaces = Terraform.VM.Workspace.get () in + let* () = Terraform.VM.destroy workspaces in + Terraform.VM.Workspace.destroy () let prometheus_import ~tags = Test.register @@ -125,64 +128,84 @@ let clean_up_vms ~tags = ~title:"Clean ups VMs manually" ~tags:("clean" :: "up" :: tags) @@ fun () -> - let* () = Terraform.VM.init () in - let* points = Terraform.VM.points () in - let n = List.length points in - let workspace = Lazy.force Env.workspace in - let names = - Seq.ints 1 |> Seq.take n - |> Seq.map (fun i -> Format.asprintf "%s-%03d" workspace i) - |> List.of_seq - in - let* zone = Terraform.VM.zone () in - (* We restart the main docker image and kill/remove all the other ones. *) - let* () = - names - |> Lwt_list.iter_p (fun vm_name -> - let*! output = - Gcloud.compute_ssh - ~zone - ~vm_name - "docker" - ["ps"; "--format"; "{{.Names}}"] - in - let images_name = - String.split_on_char '\n' output - |> List.filter (fun str -> str <> "") - in - let main_image, other_images = - List.partition (fun str -> str <> "netdata") images_name - in - if List.length main_image <> 1 then - Test.fail - "Unexpected setting. All the docker images found: %s. There \ - should only be one image which is not 'netdata' in this list" - (String.concat ";" images_name) ; - let main_image = List.hd main_image in - let*! _ = - Gcloud.compute_ssh ~zone ~vm_name "docker" ["stop"; main_image] - in - let*! _ = - Gcloud.compute_ssh ~zone ~vm_name "docker" ["start"; main_image] - in - let* () = - other_images - |> Lwt_list.iter_p (fun image -> - let*! _ = - Gcloud.compute_ssh ~zone ~vm_name "docker" ["kill"; image] - in - let*! _ = - Gcloud.compute_ssh ~zone ~vm_name "docker" ["rm"; image] - in - Lwt.return_unit) - in - Lwt.return_unit) - in - unit + let* workspaces = Terraform.VM.Workspace.get () in + workspaces + |> Lwt_list.iter_s (fun workspace -> + let* () = Terraform.VM.Workspace.select workspace in + let* () = Terraform.VM.init () in + let* points = Terraform.VM.points () in + let n = List.length points in + let names = + Seq.ints 1 |> Seq.take n + |> Seq.map (fun i -> Format.asprintf "%s-%03d" workspace i) + |> List.of_seq + in + let* zone = Terraform.VM.zone () in + (* We restart the main docker image and kill/remove all the other ones. *) + let* () = + names + |> Lwt_list.iter_p (fun vm_name -> + let*! output = + Gcloud.compute_ssh + ~zone + ~vm_name + "docker" + ["ps"; "--format"; "{{.Names}}"] + in + let images_name = + String.split_on_char '\n' output + |> List.filter (fun str -> str <> "") + in + let main_image, other_images = + List.partition (fun str -> str <> "netdata") images_name + in + if List.length main_image <> 1 then + Test.fail + "Unexpected setting. All the docker images found: %s. \ + There should only be one image which is not 'netdata' \ + in this list" + (String.concat ";" images_name) ; + let main_image = List.hd main_image in + let*! _ = + Gcloud.compute_ssh + ~zone + ~vm_name + "docker" + ["stop"; main_image] + in + let*! _ = + Gcloud.compute_ssh + ~zone + ~vm_name + "docker" + ["start"; main_image] + in + let* () = + other_images + |> Lwt_list.iter_p (fun image -> + let*! _ = + Gcloud.compute_ssh + ~zone + ~vm_name + "docker" + ["kill"; image] + in + let*! _ = + Gcloud.compute_ssh + ~zone + ~vm_name + "docker" + ["rm"; image] + in + Lwt.return_unit) + in + Lwt.return_unit) + in + unit) let simple ~tags = Cloud.register - ~vms:2 + ~vms:[{machine_type = Cli.machine_type}; {machine_type = Cli.machine_type}] ~__FILE__ ~tags:("simple" :: "health" :: tags) ~title:"Simple health check to check local configuration" diff --git a/tezt/lib_cloud/terraform.ml b/tezt/lib_cloud/terraform.ml index a7f1dbcba6d84f07e8401d06c204c1ec5054202e..6a575b2ebb6ec7d3e4108428d40a4c212f9b4f4b 100644 --- a/tezt/lib_cloud/terraform.ml +++ b/tezt/lib_cloud/terraform.ml @@ -23,7 +23,9 @@ module Docker_registry = struct let deploy () = let* project_id = Gcloud.project_id () in + let tezt_cloud = Lazy.force Env.tezt_cloud in Process.run + ~env:(String_map.singleton "TF_WORKSPACE" tezt_cloud) ~name ~color "terraform" @@ -36,8 +38,10 @@ module Docker_registry = struct ]) let get_docker_registry () = + let tezt_cloud = Lazy.force Env.tezt_cloud in let* output = Process.run_and_read_stdout + ~env:(String_map.singleton "TF_WORKSPACE" tezt_cloud) ~name ~color "terraform" @@ -50,8 +54,10 @@ module Docker_registry = struct Lwt.return registry_name let get_hostname () = + let tezt_cloud = Lazy.force Env.tezt_cloud in let* output = Process.run_and_read_stdout + ~env:(String_map.singleton "TF_WORKSPACE" tezt_cloud) ~name ~color "terraform" @@ -63,24 +69,80 @@ module Docker_registry = struct end module VM = struct - let init () = - (* If this is the first time the workspace is used, it needs to be created. - For all the other cases, terraform will fail, hence we forget the error - (yeah, this is a bit ugly). This must be run before `terraform init`. *) - let workspace = Lazy.force Env.workspace in - let* _ = - Process.spawn + (* A VM is deployed under a workspace. A single tezt cloud environment can use + multiple workspaces all prefixed by the current tezt cloud environment. *) + module Workspace = struct + let select workspace = + Process.run ~name ~color "terraform" - (chdir Path.terraform_vm @ ["workspace"; "new"; workspace]) - |> Process.wait - in + (chdir Path.terraform_vm @ ["workspace"; "select"; workspace]) + + (* Return all the workspaces associated with the current tezt cloud + environment. *) + let get () = + let tezt_cloud = Lazy.force Env.tezt_cloud in + (* We select the default workspace to be sure we can parse correctly the + output. *) + let* () = select "default" in + let* output = + Process.run_and_read_stdout + ~name + ~color + "terraform" + (chdir Path.terraform_vm @ ["workspace"; "list"]) + in + String.split_on_char '\n' output + |> List.map String.trim + |> List.filter (fun workspace -> + String.starts_with ~prefix:tezt_cloud workspace) + |> Lwt.return + + (* Create workspaces that will be used for the experiment. Delete the ones + that won't be used. *) + let init workspaces = + let* existing_workspaces = get () in + let to_create = + List.filter + (fun workspace -> not @@ List.mem workspace existing_workspaces) + workspaces + in + let* () = + to_create + |> List.map (fun workspace -> + Process.run + ~name + ~color + "terraform" + (chdir Path.terraform_vm @ ["workspace"; "new"; workspace])) + |> Lwt.join + in + (* We want to ensure the last workspace created will not be the + one selected by default. Instead it should be set when + deploying the machines. *) + let* () = select "default" in + unit + + let destroy () = + let* workspaces = get () in + workspaces + |> List.map (fun workspace -> + Process.run + ~name + ~color + "terraform" + (chdir Path.terraform_vm @ ["workspace"; "delete"; workspace])) + |> Lwt.join + end + + let init () = Process.run ~name ~color "terraform" (chdir Path.terraform_vm @ ["init"]) let deploy ~machine_type ~base_port ~ports_per_vm ~number_of_vms ~docker_registry = let* project_id = Gcloud.project_id () in + let docker_image_name = Lazy.force Env.tezt_cloud in let args = [ "--var"; @@ -95,6 +157,8 @@ module VM = struct Format.asprintf "project_id=%s" project_id; "--var"; Format.asprintf "machine_type=%s" machine_type; + "--var"; + Format.asprintf "docker_image_name=%s" docker_image_name; ] in Process.run @@ -143,22 +207,28 @@ module VM = struct in Lwt.return machine_type - let destroy () = + let destroy workspaces = let* project_id = Gcloud.project_id () in let* machine_type = machine_type () in - Process.run - ~name - ~color - "terraform" - (chdir Path.terraform_vm - @ [ - "destroy"; - "--auto-approve"; - "--var"; - Format.asprintf "project_id=%s" project_id; - "--var"; - Format.asprintf "machine_type=%s" machine_type; - ]) + let docker_image_name = Lazy.force Env.tezt_cloud in + workspaces + |> Lwt_list.iter_s (fun workspace -> + let* () = Workspace.select workspace in + Process.run + ~name + ~color + "terraform" + (chdir Path.terraform_vm + @ [ + "destroy"; + "--auto-approve"; + "--var"; + Format.asprintf "project_id=%s" project_id; + "--var"; + Format.asprintf "machine_type=%s" machine_type; + "--var"; + Format.asprintf "docker_image_name=%s" docker_image_name; + ])) end module State_bucket = struct @@ -171,8 +241,10 @@ module State_bucket = struct (chdir Path.terraform_state_bucket @ ["init"]) let deploy () = + let tezt_cloud = Lazy.force Env.tezt_cloud in let* project_id = Gcloud.project_id () in Process.run + ~env:(String_map.singleton "TF_WORKSPACE" tezt_cloud) ~name ~color "terraform" diff --git a/tezt/lib_cloud/terraform.mli b/tezt/lib_cloud/terraform.mli index bb375581f7e882598c9a4bd61141a2fd4ac4a15f..a6e4d67da486ddd95477b24160bc5c7f89f58cae 100644 --- a/tezt/lib_cloud/terraform.mli +++ b/tezt/lib_cloud/terraform.mli @@ -16,6 +16,16 @@ module Docker_registry : sig end module VM : sig + module Workspace : sig + val init : string list -> unit Lwt.t + + val get : unit -> string list Lwt.t + + val select : string -> unit Lwt.t + + val destroy : unit -> unit Lwt.t + end + val init : unit -> unit Lwt.t val deploy : @@ -30,7 +40,7 @@ module VM : sig val zone : unit -> string Lwt.t - val destroy : unit -> unit Lwt.t + val destroy : string list -> unit Lwt.t end module State_bucket : sig diff --git a/tezt/lib_cloud/terraform/vm/main.tf b/tezt/lib_cloud/terraform/vm/main.tf index 74cb6efdf591478e4597289fd0a0190c422986ac..97ff8f9da6fef550933e67fbfe518b2c2d4f1d35 100644 --- a/tezt/lib_cloud/terraform/vm/main.tf +++ b/tezt/lib_cloud/terraform/vm/main.tf @@ -28,6 +28,11 @@ variable "docker_registry_name" { default = "docker-registry" } +variable "docker_image_name" { + type = string + description = "The docker image name" +} + variable "base_port" { type = number description = "First open port by the firewall" @@ -75,7 +80,7 @@ provider "google" { # A service account must be associated with a VM resource "google_service_account" "default" { - account_id = "${terraform.workspace}-service-account-id" + account_id = "${terraform.workspace}-id" display_name = "${terraform.workspace} service Account" } @@ -93,7 +98,7 @@ module "gce-container" { source = "terraform-google-modules/container-vm/google" version = "~> 3.0" - container = { image = "${local.artifact_registry}/${var.project_id}/${var.docker_registry_name}/${terraform.workspace}" } + container = { image = "${local.artifact_registry}/${var.project_id}/${var.docker_registry_name}/${var.docker_image_name}" } } # When running a VM, it must be associated with a Virtual Private @@ -187,6 +192,8 @@ module "instance_template" { machine_type = var.machine_type + disk_type = "pd-ssd" + region = var.region } @@ -226,5 +233,6 @@ output "zone" { output "machine_type" { description = "Machine type" - value = var.machine_type + # All the instances have the same machine type + value = module.umig.instances_details[0].machine_type } diff --git a/tezt/lib_cloud/tezt_cloud.mli b/tezt/lib_cloud/tezt_cloud.mli index 41e819d9a654bf0cdc1f8a0b0e4aff901b47809f..a6662bf19e629f348b2d6308a4238cfacaabb163 100644 --- a/tezt/lib_cloud/tezt_cloud.mli +++ b/tezt/lib_cloud/tezt_cloud.mli @@ -8,11 +8,15 @@ module Cloud : sig type t + type vm_configuration = {machine_type : string} + + val default_vm_configuration : vm_configuration + (** A wrapper around [Test.register] that can be used to register new tests using VMs provided as a map indexed by name. Each VM is abstracted via the [Agent] module. *) val register : - ?vms:int -> + ?vms:vm_configuration list -> __FILE__:string -> title:string -> tags:string list -> @@ -42,6 +46,8 @@ module Cloud : sig points to scrap. Each point can have a name defined by [app_name]. *) val add_prometheus_source : t -> ?metric_path:string -> job_name:string -> target list -> unit Lwt.t + + val get_configuration : t -> Agent.t -> vm_configuration end (** [register ~tags] register a set of jobs that can be used for setting diff --git a/tezt/lib_cloud/web.ml b/tezt/lib_cloud/web.ml index 847ff482a0338e81a9d68387886a795cebdad720..bf6ac60058004dc274982e3c60aa92462bc4109b 100644 --- a/tezt/lib_cloud/web.ml +++ b/tezt/lib_cloud/web.ml @@ -5,7 +5,22 @@ (* *) (*****************************************************************************) -type t = {process : Process.t; dir : string} +type t = {process : Process.t; dir : string; deployement : Deployement.t} + +let configuration ~deployement ~agents = + let str = + agents + |> List.map (fun agent -> + let configuration = + Deployement.get_configuration deployement agent + in + Format.asprintf + "- %s: %s" + (Agent.name agent) + configuration.machine_type) + |> String.concat "\n" + in + Format.asprintf "# Configurations@.%s\n" str let monitoring ~agents = if Cli.monitoring then @@ -30,15 +45,16 @@ let prometheus () = "# Prometheus\n [Prometheus dashboard](http://localhost:9090)" else "Prometheus disabled. Use `--prometheus` to activate it." -let markdown_content ~agents = - [monitoring ~agents; prometheus ()] |> String.concat "\n" +let markdown_content ~deployement ~agents = + [configuration ~deployement ~agents; monitoring ~agents; prometheus ()] + |> String.concat "\n" let index dir = dir // "index.md" let write t ~agents = (* The content is formatted in markdown and will be rendered in html via pandoc. *) - let content = markdown_content ~agents in + let content = markdown_content ~deployement:t.deployement ~agents in let dir = t.dir in let index = index dir in Base.with_open_out index (fun oc -> output_string oc content) ; @@ -56,7 +72,7 @@ let write t ~agents = "-s"; ] -let run ~port = +let run ~port ~deployement = let dir = Temp.dir "website" in let index = index dir in let process = @@ -70,10 +86,10 @@ let run ~port = Filename.dirname index; ] in - Lwt.return {process; dir} + Lwt.return {process; dir; deployement} -let start ~port ~agents = - let* t = run ~port in +let start ~port ~deployement ~agents = + let* t = run ~port ~deployement in let* () = write t ~agents in Lwt.return t diff --git a/tezt/lib_cloud/web.mli b/tezt/lib_cloud/web.mli index b51bcbd44f2f70665b343f3a8d0d5777cf31e9c6..a02967ac1724cc963a68222241eaec21f424ddbf 100644 --- a/tezt/lib_cloud/web.mli +++ b/tezt/lib_cloud/web.mli @@ -9,7 +9,8 @@ type t (** [start ~port ~agents] starts a webpage with experimentations information if [Cli.website] is [true]. *) -val start : port:int -> agents:Agent.t List.t -> t Lwt.t +val start : + port:int -> deployement:Deployement.t -> agents:Agent.t List.t -> t Lwt.t (** [shutdown website] shutdowns the website. *) val shutdown : t -> unit Lwt.t diff --git a/tezt/tests/cloud/dal.ml b/tezt/tests/cloud/dal.ml index 1be283cf0764a24d643e8523c594aa1776a64bfd..43d91bf34f1d01b28cfd9d052f242840cbcb75f5 100644 --- a/tezt/tests/cloud/dal.ml +++ b/tezt/tests/cloud/dal.ml @@ -37,6 +37,25 @@ module Cli = struct stake_typ [100] + let stake_machine_type = + let stake_machine_type_typ = + let parse string = + try string |> String.split_on_char ',' |> Option.some with _ -> None + in + let show l = l |> String.concat "," in + Clap.typ ~name:"stake_machine_type" ~dummy:["foo"] ~parse ~show + in + Clap.optional + ~section + ~long:"stake-machine-type" + ~placeholder:",,, ..." + ~description: + "Specify the machine type used by the stake. The nth machine type will \ + be assigned to the nth stake specified with [--stake]. If less \ + machine types are specified, the default one will be used." + stake_machine_type_typ + () + let producers = Clap.default_int ~section @@ -44,6 +63,13 @@ module Cli = struct ~description:"Specify the number of DAL producers for this test" 1 + let producer_machine_type = + Clap.optional_string + ~section + ~long:"producer-machine-type" + ~description:"Machine type used for the DAL producers" + () + let protocol = let protocol_typ = let parse string = @@ -76,9 +102,11 @@ end type configuration = { stake : int list; + stake_machine_type : string list option; dal_node_producer : int; protocol : Protocol.t; producer_spreading_factor : int; + producer_machine_type : string option; } type bootstrap = {node : Node.t; dal_node : Dal_node.t; client : Client.t} @@ -476,7 +504,6 @@ let get_infos_per_level client ~level = in let attestations = consensus_operations |> List.to_seq - |> Seq.filter is_dal_attestation |> Seq.map (fun operation -> let public_key_hash = get_public_key_hash operation in let dal_attestation = @@ -690,20 +717,20 @@ let init_producer cloud ~bootstrap_node ~dal_bootstrap_node ~number_of_slots let is_ready = Dal_node.run ~event_level:`Notice dal_node in Lwt.return {client; node; dal_node; account; is_ready} -let init ~configuration cloud next_agent = +let init ~(configuration : configuration) cloud next_agent = let* bootstrap_agent = next_agent ~name:"bootstrap" in - let* producers_agents = - List.init configuration.dal_node_producer (fun i -> - let name = Format.asprintf "producer-%d" i in - next_agent ~name) - |> Lwt.all - in let* attesters_agents = List.init (List.length configuration.stake) (fun i -> let name = Format.asprintf "attester-%d" i in next_agent ~name) |> Lwt.all in + let* producers_agents = + List.init configuration.dal_node_producer (fun i -> + let name = Format.asprintf "producer-%d" i in + next_agent ~name) + |> Lwt.all + in let* bootstrap, baker_accounts, producer_accounts = init_bootstrap cloud configuration bootstrap_agent in @@ -822,15 +849,39 @@ let rec loop t level = let configuration = let stake = Cli.stake in + let stake_machine_type = Cli.stake_machine_type in let dal_node_producer = Cli.producers in let protocol = Cli.protocol in let producer_spreading_factor = Cli.producer_spreading_factor in - {stake; dal_node_producer; protocol; producer_spreading_factor} + let producer_machine_type = Cli.producer_machine_type in + { + stake; + stake_machine_type; + dal_node_producer; + protocol; + producer_spreading_factor; + producer_machine_type; + } let benchmark () = let vms = 1 + List.length configuration.stake + configuration.dal_node_producer in + let vms = + List.init vms (fun i -> + (* Bootstrap agent *) + if i = 0 then Cloud.default_vm_configuration + else if i < List.length configuration.stake + 1 then + match configuration.stake_machine_type with + | None -> Cloud.default_vm_configuration + | Some list -> ( + try {machine_type = List.nth list (i - 1)} + with _ -> Cloud.default_vm_configuration) + else + match configuration.producer_machine_type with + | None -> Cloud.default_vm_configuration + | Some machine_type -> {machine_type}) + in Cloud.register ~vms ~__FILE__ @@ -846,7 +897,7 @@ let benchmark () = explicitely a reduced number of agents and it is not clear how to give them proper names. *) let set_name agent name = - if List.length agents = vms then + if List.length agents = List.length vms then Cloud.set_agent_name cloud agent name else Lwt.return_unit in