diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 31dfcfff4645fe8195be00fafbfb813b2497a5eb..9220ab2fc7e8d572ceb32b05ae69f2ec99a8712f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,7 +16,6 @@ workflow: - if: '$CI_COMMIT_BRANCH =~ $REGEX_RELEASE_PREPARE_BRANCH_PREFIX' - if: '$CI_COMMIT_BRANCH =~ $REGEX_RELEASE_BRANCH_PREFIX' - if: $CI_MERGE_REQUEST_EVENT_TYPE - - if: $CI_PIPELINE_SOURCE == "schedule" stages: - lint diff --git a/ci/config/sonobuoy-config.toml b/ci/config/sonobuoy-config.toml new file mode 100644 index 0000000000000000000000000000000000000000..e51e28dc3477b134aca8155541e831ebf63d3aa6 --- /dev/null +++ b/ci/config/sonobuoy-config.toml @@ -0,0 +1,241 @@ +[terraform] +# Huh, no subnet_cidr? Yep, that variable is now set by the gitlab runners. +# For testing IPsec in parallel (you know that the CI can spawn > 1 clusters in parallel, don't you?) +# we have to have different OpenStack subnet addresses. Otherwise our poor remote IPsec endpoint +# would be confused because two peers would contact it with the same traffic selector and hence the +# test ping wouldn't go through reliably. If you want to lookup the CIDR, log into the runner nodes and +# checkout the `environment` variable in the indiviual `[[runners]]` sections. + +haproxy_ports = [30060] +workers = 2 +master = 2 + +worker_flavors = [ + "L", # cpu-0 + "L", # cpu-1 +] +worker_names = [ + "cpu-0", + "cpu-1" +] +master_images = ["Ubuntu 20.04 LTS x64", "Ubuntu 22.04 LTS x64"] +worker_images = ["Ubuntu 20.04 LTS x64", "Ubuntu 22.04 LTS x64"] +enable_az_management = false +cluster_name = "ci" + +dualstack_support = false +subnet_v6_cidr = "fd00::/120" + +[load-balancing] +deprecated_nodeport_lb_test_port = 30060 +lb_ports = ["{{ deprecated_nodeport_lb_test_port }}"] + +[wireguard] + +[[wireguard.endpoints]] +id = 0 +enabled = true +ip_cidr = "172.30.153.0/24" +ip_gw = "172.30.153.1/24" +ipv6_cidr = "fd01::/120" +ipv6_gw = "fd01::1/120" +port = 7777 + +[[wireguard.endpoints]] +id = 1 +enabled = true +ip_cidr = "172.30.152.0/24" +ip_gw = "172.30.152.1/24" +port = 7778 + +[[wireguard.peers]] +pub_key = "MQL6dL0DSOnXTLrScCseY7Fs8S5Hb4yHc6SZ+/ucNx0=" +ip = "172.30.153.14/32" +ipv6 = "fd01::14/128" +ident = "gitlab-ci-runner" + +[ch-k8s-lbaas] +enabled = true +shared_secret = "IYeOlEFO1h3uc9x1bdw9thNNgmn1gm8dmzos3f04PLmFjt3d" +version = "0.7.0" +agent_port = 15203 + +[kubernetes] +version = "1.27.10" +is_gpu_cluster = false +virtualize_gpu = false + +[kubernetes.kubelet] +evictionsoft_memory_period = "1m25s" +evictionhard_nodefs_available = "12%" +evictionhard_nodefs_inodesfree = "7%" + +[kubernetes.apiserver] +frontend_port = 8888 + +[kubernetes.storage] +rook_enabled = false +nodeplugin_toleration = true + +[kubernetes.local_storage.static] +enabled = false +storageclass_name = "local-storage-static" + +[kubernetes.local_storage.dynamic] +enabled = false +storageclass_name = "local-storage-dynamic" + +[kubernetes.monitoring] +enabled = false + +[kubernetes.global_monitoring] +enabled = false +nodeport = 31911 +nodeport_name = "ch-k8s-global-monitoring" + +[kubernetes.network] +plugin = "calico" +pod_subnet = "10.244.0.0/16" +service_subnet = "10.96.0.0/12" + +[k8s-service-layer.rook] +namespace = "rook-ceph" +cluster_name = "rook-ceph" +use_helm = true +skip_upgrade_checks = true +nodeplugin_toleration = true +nmons = 3 +nmgrs = 2 +nosds = 3 +osd_volume_size = "90Gi" +encrypt_osds = true +toolbox = true +ceph_fs = true +mon_volume = true +mon_volume_storage_class = "local-storage-static" +csi_plugins = true + +use_host_networking = true + +dashboard = true + +mds_memory_limit = "4Gi" +mds_memory_request = "{{ rook_mds_memory_limit }}" +mds_cpu_limit = "1" +mds_cpu_request = "{{ rook_mds_cpu_limit }}" + +mon_cpu_limit = "500m" +mon_cpu_request = "100m" +mon_memory_limit = "1Gi" +mon_memory_request = "500Mi" + +operator_cpu_limit = "500m" +operator_cpu_request = "100m" + +scheduling_key = "{{ scheduling_key_prefix }}/storage" +mgr_scheduling_key = "{{ scheduling_key_prefix }}/rook-mgr" + +[[k8s-service-layer.rook.pools]] +name = "data" +create_storage_class = true +replicated = 1 + +[[k8s-service-layer.rook.pools]] +name = "test-create-storage-class-false" +create_storage_class = false +replicated = 1 + +[[k8s-service-layer.rook.pools]] +name = "test-create-storage-class-undefined" +replicated = 1 + +[k8s-service-layer.prometheus] +use_thanos = false +use_grafana = false +grafana_persistent_storage_class = "rook-ceph-cephfs" +prometheus_persistent_storage_class = "csi-sc-cinderplugin" +thanos_objectstorage_container_name = "ci-monitoring-thanos-data" +scheduling_key = "{{ scheduling_key_prefix }}/monitoring" +grafana_memory_limit = "768Mi" +grafana_memory_request = "768Mi" +grafana_cpu_limit = "600m" +grafana_cpu_request = "200m" + +thanos_store_in_memory_max_size = "1GB" + +internet_probe = true +[[k8s-service-layer.prometheus.internet_probe_targets]] +name = "yaook" # Human readable URL that will appear in Prometheus / AlertManager +url = "https://yaook.cloud/" # The URL that blackbox will scrape +interval = "60s" # Scraping interval. Overrides value set in `defaults` +scrapeTimeout = "60s" # Scrape timeout. Overrides value set in `defaults` +[[k8s-service-layer.prometheus.internet_probe_targets]] +name = "quad-9" # Human readable URL that will appear in Prometheus / AlertManager +url = "9.9.9.9" # The URL that blackbox will scrape +module = "icmp" + + +[k8s-service-layer.prometheus.common_labels] +managed-by = "yaook-k8s" + +[k8s-service-layer.cert-manager] +enabled = true + +[k8s-service-layer.ingress] +enabled = true + +[k8s-service-layer.vault] +enabled = false +# Setting `ingress=true` only works and makes sense if we include the letsencrypt certificate creation. +# We don't have a fixed public IP for the CI (as an Ingress IP), nor do we have DNSaaS, hence we can't do this. +ingress = false +# Backups require access to an S3 bucket. If one feels fancy, the test can be extended later and could like this: +# 1.) inject credentials to an S3 bucket securely (!) into the CI job +# 2.) enable backups +# 3.) hop into the vault-backup pod and execute `backup-now` +# 4.) check if the metric `yaook_backup_shifter_warm_backups_total` increased +enable_backups = false + +[k8s-service-layer.fluxcd] +enabled = true + +[testing] +nodes = ["ci-worker-cpu-0", "ci-worker-cpu-1"] +force_reboot_nodes = true + +[node-scheduling] +scheduling_key_prefix = "scheduling.mk8s.cloudandheat.com" + +[node-scheduling.labels] +ci-worker-storage-0 = ["{{ scheduling_key_prefix }}/storage=true"] +ci-worker-cpu-0 = ["{{ scheduling_key_prefix }}/monitoring=true","{{ scheduling_key_prefix }}/rook-mgr=true"] + + +[ipsec] +enabled = true +test_enabled = true +# To test IPSec we assume presence of a remote endpoint with ipsec up and running. +# We setup an endpoint on a private VM (outside CI) +# using https://gitlab.com/yaook/incubator/k8s-ipsec-endpoint +# We added its IP network information here to initialize an Ipsec tunnel in CI. + +# In case you have no access to the endpoint (and no one remains who can), +# feel free to create a new one and update the IP. + +proposals = ["aes256-sha256-modp2048"] +peer_networks = ["172.20.150.0/24"] +remote_addrs = ["185.128.117.230"] +remote_name = "185.128.117.230" +remote_private_addrs = "172.20.150.154" + +[miscellaneous] +wireguard_on_workers = false + +custom_chrony_configuration = true +custom_ntp_servers = [ "0.de.pool.ntp.org", "1.de.pool.ntp.org", "2.de.pool.ntp.org", "3.de.pool.ntp.org"] + +[vault] +cluster_name = "k8s.ci.yaook.cloud" +policy_prefix = "yaook" +path_prefix = "yaook" +nodes_approle = "yaook/nodes" diff --git a/ci/diagnostic-tools.yaml b/ci/diagnostic-tools.yaml index 992b5d9242337936d48249e94e0b71e44c4d87cd..c4579977f20a6dd54aac8417a5ac8dbabec01c3d 100644 --- a/ci/diagnostic-tools.yaml +++ b/ci/diagnostic-tools.yaml @@ -1,18 +1,33 @@ -# this file should be included into '.gitlab-ci.yml' - -.test_with_sonobuoy: +# Spawn a cluster and run sonobuoy end-to-end tests in conformance mode +sonobuoy-tests: + environment: + name: spawn-cluster image: "registry.gitlab.com/yaook/images/k8s-ci/f1a:devel" + stage: diagnostic-tools + variables: + KUBECONFIG: etc/admin.conf + # the variables defined in .test_upgrade_path_k8s_cluster get overwritten, + # so we need to reference the default vars here (only one level of nesting is allowed) + WG_COMPANY_USERS: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, WG_COMPANY_USERS] + K8S_CUSTOM_STAGE_USAGE: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, K8S_CUSTOM_STAGE_USAGE] + ANSIBLE_ANY_ERRORS_FATAL: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ANSIBLE_ANY_ERRORS_FATAL] + ANSIBLE_FORCE_COLOR: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ANSIBLE_FORCE_COLOR] + MANAGED_K8S_LATEST_RELEASE: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, MANAGED_K8S_LATEST_RELEASE] + ci_vault_config_hcl: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_vault_config_hcl] + ci_cert_vaultchain_crt: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vaultchain_crt] + ci_cert_vault_key: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vault_key] + ci_cert_vaultca_crt: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vaultca_crt] services: !reference [.prepare_spawn_test_cleanup_k8s_cluster, services] before_script: - !reference [.prepare_spawn_test_cleanup_k8s_cluster, before_script] + - rm config/config.toml + - mv config/sonobuoy-config.toml config/config.toml script: + - ls -a + - cat config/config.toml # Spawn and test cluster - !reference [.prepare_spawn_test_cleanup_k8s_cluster, script] - - kubectl taint node ci-worker-gpu-0 k8s.yaook.cloud/gpu-node=true:NoSchedule- - - kubectl taint node ci-worker-storage-0 scheduling.mk8s.cloudandheat.com/storage=true:NoSchedule- - - kubectl taint node ci-worker-storage-1 scheduling.mk8s.cloudandheat.com/storage=true:NoSchedule- - - kubectl taint node ci-worker-storage-2 scheduling.mk8s.cloudandheat.com/storage=true:NoSchedule- - # Run sonobuoy in conformance mode and wait until tests are finished + # Run sonobuoy in conformance mode and wait until tests are finished - sonobuoy run --mode=certified-conformance --wait # Save artifacts - mkdir -p diagnostics/sonobuoy/ @@ -31,12 +46,7 @@ when: always paths: - ci/diagnostics/sonobuoy/ - tags: !reference [.prepare_spawn_test_cleanup_k8s_cluster, tags] - -# Spawn a cluster and run sonobuoy end-to-end tests in conformance mode -sonobuoy-tests: - environment: - name: spawn-cluster + retry: 1 # because this test lasts hours and could block the CI for a significant amount of time rules: # test is scheduled - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_REF_NAME == $YAOOK_K8S_DEFAULT_BRANCH @@ -45,19 +55,4 @@ sonobuoy-tests: when: on_success - when: manual allow_failure: true - extends: .test_with_sonobuoy - stage: diagnostic-tools - retry: 1 # because this test lasts hours and could block the CI for a significant amount of time - variables: - KUBECONFIG: etc/admin.conf - # the variables defined in .test_upgrade_path_k8s_cluster get overwritten, - # so we need to reference the default vars here (only one level of nesting is allowed) - WG_COMPANY_USERS: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, WG_COMPANY_USERS] - K8S_CUSTOM_STAGE_USAGE: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, K8S_CUSTOM_STAGE_USAGE] - ANSIBLE_ANY_ERRORS_FATAL: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ANSIBLE_ANY_ERRORS_FATAL] - ANSIBLE_FORCE_COLOR: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ANSIBLE_FORCE_COLOR] - MANAGED_K8S_LATEST_RELEASE: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, MANAGED_K8S_LATEST_RELEASE] - ci_vault_config_hcl: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_vault_config_hcl] - ci_cert_vaultchain_crt: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vaultchain_crt] - ci_cert_vault_key: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vault_key] - ci_cert_vaultca_crt: !reference [.prepare_spawn_test_cleanup_k8s_cluster, variables, ci_cert_vaultca_crt] + tags: !reference [.prepare_spawn_test_cleanup_k8s_cluster, tags] diff --git a/docs/_releasenotes/1125.chore.sonobuy b/docs/_releasenotes/1125.chore.sonobuy new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391