diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dc4895ad38d52447e2c8869f9c9741f71779cc6a..b6af9d3b6c9ffd1232595194b4231456a03bd895 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,112 +7,9 @@ stages: - renovate_bot - datasets -include: - - template: Workflows/MergeRequest-Pipelines.gitlab-ci.yml - - # Upgrades dependencies on a schedule - # see https://gitlab.com/gitlab-com/gl-infra/common-ci-tasks/-/blob/main/renovate-bot.md - - project: "gitlab-com/gl-infra/common-ci-tasks" - ref: v2.41.1 # renovate:managed - file: renovate-bot.yml - -variables: - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" - POETRY_CACHE_DIR: "$CI_PROJECT_DIR/.cache/poetry" - GIT_LFS_SKIP_SMUDGE: 1 # Prevent Git LFS from automatically downloading large files - -cache: - key: - files: - - poetry.lock - - .gitlab-ci.yml - paths: - - $PIP_CACHE_DIR - - $POETRY_CACHE_DIR - - requirements.txt - -.poetry: - before_script: - - pip install poetry==1.8.3 - - poetry config virtualenvs.in-project true - - poetry config cache-dir ${POETRY_CACHE_DIR} - - poetry export -f requirements.txt --output requirements.txt --without-hashes - - poetry config --list - -############## -# Conditions # -############## -.if-merge-request: &if-merge-request - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' - -.if-default-branch: &if-default-branch - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH" - -.if-renovate-schedule: &if-renovate-schedule - if: '$CI_PIPELINE_SOURCE == "schedule" && $RENOVATE_SCHEDULED' - -.code-changes: &code-changes - - "**/*.{py}" - - ".gitlab-ci.yml" - - ".gitlab/ci/**/*" - - "Makefile" - - "poetry.lock" - -######### -# Rules # -######### -.rules:code-changes: - rules: - - <<: *if-renovate-schedule - when: never - - <<: *if-merge-request - changes: *code-changes - - <<: *if-default-branch - changes: *code-changes - -.rules:run-evaluations: - rules: - - <<: *if-default-branch - run_evaluations: - extends: - - .rules:run-evaluations stage: eval needs: [] trigger: include: - local: .gitlab/ci/eval.gitlab-ci.yml - -lint: - extends: - - .poetry - - .rules:code-changes - stage: lint - script: - - make lint - - poetry lock --no-update - - git diff --exit-code - after_script: - - | - # Hint for fixing issues - MAGENTA=$(printf '\e[35m') - BOLD=$(printf '\e[1m') - RESET=$(printf '\e[0m') - echo "${MAGENTA}Run ${BOLD}make format${RESET}${MAGENTA} to fix formatting issues.${RESET}" - -langsmith:pull: - extends: - - .poetry - stage: datasets - variables: - GITLAB_BASE_URL: https://gitlab.com - GITLAB_PRIVATE_TOKEN: $GITLAB_PRIVATE_TOKEN - GITLAB_TOKEN_NAME: langsmith-dataset-sync - needs: [] - when: manual - script: - - apt update && apt install git-lfs - - git lfs pull --include "datasets/synced/*" - - | - git remote set-url --push origin https://$GITLAB_TOKEN_NAME:$GITLAB_PRIVATE_TOKEN@gitlab.com/gitlab-org/ai-powered/eli5.git - - poetry install && poetry run eli5 datasets sync datasets/synced --create-mr diff --git a/.gitlab/ci/eval.gitlab-ci.yml b/.gitlab/ci/eval.gitlab-ci.yml index 9171440a45871bb720f36f668c3ce88ef2891a5a..51cb516e470978fb6e7fa49351703bf37702440f 100644 --- a/.gitlab/ci/eval.gitlab-ci.yml +++ b/.gitlab/ci/eval.gitlab-ci.yml @@ -2,6 +2,7 @@ variables: GOOGLE_APPLICATION_CREDENTIALS: "/root/gcloud-service-key.json" GOOGLE_CREDENTIALS: $GOOGLE_CREDENTIALS GOOGLE_PROJECT: $GOOGLE_PROJECT + CI_DEBUG_SERVICES: "true" .setup-gcloud: &setup-gcloud - printf "%s" "$GOOGLE_CREDENTIALS" > "$GOOGLE_APPLICATION_CREDENTIALS" @@ -19,9 +20,7 @@ variables: services: - name: registry.gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/model-gateway:${AI_GATEWAY_TAG} alias: ai-gateway - - name: registry.gitlab.com/gitlab-org/ai-powered/custom-models/evaluations/gcloud-ollama - alias: gcloud-ollama - image: google/cloud-sdk:alpine + image: google/cloud-sdk:485.0.0-alpine needs: [] when: manual before_script: @@ -35,16 +34,20 @@ variables: code-suggestions: extends: - .setup + parallel: + matrix: + - MODEL_NAME: [mistral, codegemma, codellama, codestral] script: - echo "Running code ${CODE_SUGGESTIONS_INTENT}s evaluations" - poetry run eli5 code-suggestions evaluate - --dataset=$DATASET - --source=$SOURCE - --limit=$LIMIT - --experiment-prefix="code-suggestions-${CODE_SUGGESTIONS_INTENT}-${MODEL_NAME}" + --dataset=code_generation_mbpp_all_sanitized + --source=ai_gateway + --experiment-prefix="code-suggestions-${MODEL_NAME}" --model-name=$MODEL_NAME - --model-endpoint=$MODEL_ENDPOINT - --intent=$CODE_SUGGESTIONS_INTENT + --model-provider="ollama_chat/${MODEL_NAME}" + --model-endpoint=http://34.42.191.31 + --model-api-key=$MODEL_API_KEY + --intent=generation prompts: extends: diff --git a/eli5/cli/codesuggestions/evaluate.py b/eli5/cli/codesuggestions/evaluate.py index 9d8c78d2dc0652061fc2d9b5583de1d5353862a1..e542dd06b2d2909bd529b3531f0f535056af15b6 100644 --- a/eli5/cli/codesuggestions/evaluate.py +++ b/eli5/cli/codesuggestions/evaluate.py @@ -95,6 +95,12 @@ def evaluate( help="Send model requests to a specific endpoint; this only applies if source=ai_gateway" ), ] = None, + model_api_key: Annotated[ + Optional[str], + typer.Option( + help="Send model requests to a specific api key; this only applies if source=ai_gateway" + ), + ] = None, ): client: Client = ctx.obj.langsmith_client @@ -114,5 +120,6 @@ def evaluate( model_name=model_name, model_provider=model_provider, model_endpoint=model_endpoint, + model_api_key=model_api_key, ) print(results) diff --git a/eli5/codesuggestions/clients/ai_gateway.py b/eli5/codesuggestions/clients/ai_gateway.py index 9d06599dde575d7409b9f3b532e6c7d2d56263fe..e17b466d348ed0155d3a6e8e079ee891c8933940 100644 --- a/eli5/codesuggestions/clients/ai_gateway.py +++ b/eli5/codesuggestions/clients/ai_gateway.py @@ -22,11 +22,14 @@ def ai_gateway_code_suggestion(file_name, content_above_cursor, content_below_cu "content_above_cursor": content_above_cursor, "content_below_cursor": content_below_cursor, }, + "prompt": "", "prompt_version": 2, - "prompt": None, - "model_provider": parameters.get("model_provider", "litellm"), + "prompt_id": "code_suggestions/generations", + "model_identifier": parameters.get("model_identifier", "litellm"), + "model_provider": "litellm", "model_endpoint": parameters.get("model_endpoint"), "model_name": parameters.get("model_name"), + "model_api_key": parameters.get("model_api_key"), } if intent == "generations": diff --git a/eli5/codesuggestions/evaluate.py b/eli5/codesuggestions/evaluate.py index f34b742052f2a6f6327223359124291d25347c87..fd5abf5aaf293b1c3fdfa937b97e7a74996b9363 100644 --- a/eli5/codesuggestions/evaluate.py +++ b/eli5/codesuggestions/evaluate.py @@ -76,6 +76,7 @@ def evaluate( model_name: Optional[str] = None, model_provider: Optional[str] = None, model_endpoint: Optional[str] = None, + model_api_key: Optional[str] = None, ): get_code_suggestion = CODE_SUGGESTION_PROVIDERS.get(code_suggestions_source) @@ -92,8 +93,9 @@ def evaluate( { "intent": intent, "model_name": model_name, - "model_provider": model_provider, + "model_identifier": model_provider, "model_endpoint": model_endpoint, + "model_api_key": model_api_key, } ) elif code_suggestions_source == "fireworks":