From b4a2aa827554621e231c96a0086298402c2a907c Mon Sep 17 00:00:00 2001
From: Igor Drozdov <idrozdov@gitlab.com>
Date: Tue, 15 Oct 2024 14:20:23 +0200
Subject: [PATCH] feat(evals): evaluate code suggestions

---
 .gitlab-ci.yml                             | 103 ---------------------
 .gitlab/ci/eval.gitlab-ci.yml              |  21 +++--
 eli5/cli/codesuggestions/evaluate.py       |   7 ++
 eli5/codesuggestions/clients/ai_gateway.py |   7 +-
 eli5/codesuggestions/evaluate.py           |   4 +-
 5 files changed, 27 insertions(+), 115 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dc4895ad..b6af9d3b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,112 +7,9 @@ stages:
   - renovate_bot
   - datasets
 
-include:
-  - template: Workflows/MergeRequest-Pipelines.gitlab-ci.yml
-
-  # Upgrades dependencies on a schedule
-  # see https://gitlab.com/gitlab-com/gl-infra/common-ci-tasks/-/blob/main/renovate-bot.md
-  - project: "gitlab-com/gl-infra/common-ci-tasks"
-    ref: v2.41.1 # renovate:managed
-    file: renovate-bot.yml
-
-variables:
-  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
-  POETRY_CACHE_DIR: "$CI_PROJECT_DIR/.cache/poetry"
-  GIT_LFS_SKIP_SMUDGE: 1 # Prevent Git LFS from automatically downloading large files
-
-cache:
-  key:
-    files:
-      - poetry.lock
-      - .gitlab-ci.yml
-  paths:
-    - $PIP_CACHE_DIR
-    - $POETRY_CACHE_DIR
-    - requirements.txt
-
-.poetry:
-  before_script:
-    - pip install poetry==1.8.3
-    - poetry config virtualenvs.in-project true
-    - poetry config cache-dir ${POETRY_CACHE_DIR}
-    - poetry export -f requirements.txt --output requirements.txt --without-hashes
-    - poetry config --list
-
-##############
-# Conditions #
-##############
-.if-merge-request: &if-merge-request
-  if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
-
-.if-default-branch: &if-default-branch
-  if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
-
-.if-renovate-schedule: &if-renovate-schedule
-  if: '$CI_PIPELINE_SOURCE == "schedule" && $RENOVATE_SCHEDULED'
-
-.code-changes: &code-changes
-  - "**/*.{py}"
-  - ".gitlab-ci.yml"
-  - ".gitlab/ci/**/*"
-  - "Makefile"
-  - "poetry.lock"
-
-#########
-# Rules #
-#########
-.rules:code-changes:
-  rules:
-    - <<: *if-renovate-schedule
-      when: never
-    - <<: *if-merge-request
-      changes: *code-changes
-    - <<: *if-default-branch
-      changes: *code-changes
-
-.rules:run-evaluations:
-  rules:
-    - <<: *if-default-branch
-
 run_evaluations:
-  extends:
-    - .rules:run-evaluations
   stage: eval
   needs: []
   trigger:
     include:
       - local: .gitlab/ci/eval.gitlab-ci.yml
-
-lint:
-  extends:
-    - .poetry
-    - .rules:code-changes
-  stage: lint
-  script:
-    - make lint
-    - poetry lock --no-update
-    - git diff --exit-code
-  after_script:
-    - |
-      # Hint for fixing issues
-      MAGENTA=$(printf '\e[35m')
-      BOLD=$(printf '\e[1m')
-      RESET=$(printf '\e[0m')
-      echo "${MAGENTA}Run ${BOLD}make format${RESET}${MAGENTA} to fix formatting issues.${RESET}"
-
-langsmith:pull:
-  extends:
-    - .poetry
-  stage: datasets
-  variables:
-    GITLAB_BASE_URL: https://gitlab.com
-    GITLAB_PRIVATE_TOKEN: $GITLAB_PRIVATE_TOKEN
-    GITLAB_TOKEN_NAME: langsmith-dataset-sync
-  needs: []
-  when: manual
-  script:
-    - apt update && apt install git-lfs
-    - git lfs pull --include "datasets/synced/*"
-    - |
-      git remote set-url --push origin https://$GITLAB_TOKEN_NAME:$GITLAB_PRIVATE_TOKEN@gitlab.com/gitlab-org/ai-powered/eli5.git
-    - poetry install && poetry run eli5 datasets sync datasets/synced --create-mr
diff --git a/.gitlab/ci/eval.gitlab-ci.yml b/.gitlab/ci/eval.gitlab-ci.yml
index 9171440a..51cb516e 100644
--- a/.gitlab/ci/eval.gitlab-ci.yml
+++ b/.gitlab/ci/eval.gitlab-ci.yml
@@ -2,6 +2,7 @@ variables:
   GOOGLE_APPLICATION_CREDENTIALS: "/root/gcloud-service-key.json"
   GOOGLE_CREDENTIALS: $GOOGLE_CREDENTIALS
   GOOGLE_PROJECT: $GOOGLE_PROJECT
+  CI_DEBUG_SERVICES: "true"
 
 .setup-gcloud: &setup-gcloud
   - printf "%s" "$GOOGLE_CREDENTIALS" > "$GOOGLE_APPLICATION_CREDENTIALS"
@@ -19,9 +20,7 @@ variables:
   services:
     - name: registry.gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/model-gateway:${AI_GATEWAY_TAG}
       alias: ai-gateway
-    - name: registry.gitlab.com/gitlab-org/ai-powered/custom-models/evaluations/gcloud-ollama
-      alias: gcloud-ollama
-  image: google/cloud-sdk:alpine
+  image: google/cloud-sdk:485.0.0-alpine
   needs: []
   when: manual
   before_script:
@@ -35,16 +34,20 @@ variables:
 code-suggestions:
   extends:
     - .setup
+  parallel:
+    matrix:
+      - MODEL_NAME: [mistral, codegemma, codellama, codestral]
   script:
     - echo "Running code ${CODE_SUGGESTIONS_INTENT}s evaluations"
     - poetry run eli5 code-suggestions evaluate
-      --dataset=$DATASET
-      --source=$SOURCE
-      --limit=$LIMIT
-      --experiment-prefix="code-suggestions-${CODE_SUGGESTIONS_INTENT}-${MODEL_NAME}"
+      --dataset=code_generation_mbpp_all_sanitized
+      --source=ai_gateway
+      --experiment-prefix="code-suggestions-${MODEL_NAME}"
       --model-name=$MODEL_NAME
-      --model-endpoint=$MODEL_ENDPOINT
-      --intent=$CODE_SUGGESTIONS_INTENT
+      --model-provider="ollama_chat/${MODEL_NAME}"
+      --model-endpoint=http://34.42.191.31
+      --model-api-key=$MODEL_API_KEY
+      --intent=generation
 
 prompts:
   extends:
diff --git a/eli5/cli/codesuggestions/evaluate.py b/eli5/cli/codesuggestions/evaluate.py
index 9d8c78d2..e542dd06 100644
--- a/eli5/cli/codesuggestions/evaluate.py
+++ b/eli5/cli/codesuggestions/evaluate.py
@@ -95,6 +95,12 @@ def evaluate(
             help="Send model requests to a specific endpoint; this only applies if source=ai_gateway"
         ),
     ] = None,
+    model_api_key: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Send model requests to a specific api key; this only applies if source=ai_gateway"
+        ),
+    ] = None,
 ):
     client: Client = ctx.obj.langsmith_client
 
@@ -114,5 +120,6 @@ def evaluate(
         model_name=model_name,
         model_provider=model_provider,
         model_endpoint=model_endpoint,
+        model_api_key=model_api_key,
     )
     print(results)
diff --git a/eli5/codesuggestions/clients/ai_gateway.py b/eli5/codesuggestions/clients/ai_gateway.py
index 9d06599d..e17b466d 100644
--- a/eli5/codesuggestions/clients/ai_gateway.py
+++ b/eli5/codesuggestions/clients/ai_gateway.py
@@ -22,11 +22,14 @@ def ai_gateway_code_suggestion(file_name, content_above_cursor, content_below_cu
             "content_above_cursor": content_above_cursor,
             "content_below_cursor": content_below_cursor,
         },
+        "prompt": "",
         "prompt_version": 2,
-        "prompt": None,
-        "model_provider": parameters.get("model_provider", "litellm"),
+        "prompt_id": "code_suggestions/generations",
+        "model_identifier": parameters.get("model_identifier", "litellm"),
+        "model_provider": "litellm",
         "model_endpoint": parameters.get("model_endpoint"),
         "model_name": parameters.get("model_name"),
+        "model_api_key": parameters.get("model_api_key"),
     }
 
     if intent == "generations":
diff --git a/eli5/codesuggestions/evaluate.py b/eli5/codesuggestions/evaluate.py
index f34b7420..fd5abf5a 100644
--- a/eli5/codesuggestions/evaluate.py
+++ b/eli5/codesuggestions/evaluate.py
@@ -76,6 +76,7 @@ def evaluate(
     model_name: Optional[str] = None,
     model_provider: Optional[str] = None,
     model_endpoint: Optional[str] = None,
+    model_api_key: Optional[str] = None,
 ):
     get_code_suggestion = CODE_SUGGESTION_PROVIDERS.get(code_suggestions_source)
 
@@ -92,8 +93,9 @@ def evaluate(
             {
                 "intent": intent,
                 "model_name": model_name,
-                "model_provider": model_provider,
+                "model_identifier": model_provider,
                 "model_endpoint": model_endpoint,
+                "model_api_key": model_api_key,
             }
         )
     elif code_suggestions_source == "fireworks":
-- 
GitLab