From 2126fe018d145c0a574103b0b507f0ed32b7a983 Mon Sep 17 00:00:00 2001 From: Nathan Weinshenker Date: Thu, 24 Oct 2024 12:45:50 +0200 Subject: [PATCH 1/4] Pagination added to the following LangSmith dataset We should probably ask the following LangSmith group about whether this will be implemented but it's pretty easy in general --- eli5/cli/duochat/evaluate_v1.py | 19 +++++++++++++++++-- eli5/duochat/evaluation/docs.py | 18 +++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py index 89b5b25b..0eec83a0 100644 --- a/eli5/cli/duochat/evaluate_v1.py +++ b/eli5/cli/duochat/evaluate_v1.py @@ -35,7 +35,7 @@ LangSmith client, which can be reused by the command. """ from pathlib import Path -from typing import Annotated +from typing import Annotated, Optional import typer @@ -58,8 +58,23 @@ def docs( " containing examples with context, questions, and possible answers.", ), ] = "duo_chat.cot_qa_docs.1", + offset: Annotated[ + Optional[int], + typer.Option( + show_default=True, + help="The number of dataset rows to skip before starting the evaluation." + ) + ] = 0, + limit: Annotated[ + Optional[int], + typer.Option( + show_default=False, + help="The maximum number of dataset rows to evaluate after the offset." + " If not set, all remaining rows will be evaluated." + ) + ] = 0, ): - duochat.evaluation.docs(ctx.obj.langsmith_client, dataset) + duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset) @app.command(help="Evaluate DuoChat's accuracy on resource-related question answering using LangSmith.") diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py index 9bb20e21..40acc6ca 100644 --- a/eli5/duochat/evaluation/docs.py +++ b/eli5/duochat/evaluation/docs.py @@ -1,4 +1,6 @@ from datetime import datetime +from itertools import islice +from typing import Optional from langchain_anthropic import ChatAnthropic from langsmith import Client @@ -16,13 +18,15 @@ def _predict(inputs: dict) -> dict: return {"actual_answer": actual_answer} -def docs(client: Client, dataset: str): +def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[int]): """ Evaluate the accuracy of DuoChat answers against a given dataset using LangSmith. Args: client (Client): A LangSmith Client instance for interacting with the LangSmith API. dataset (str): The name of the dataset to use for evaluation compatible with the format of 'duo_chat.cot_qa_docs.1'. + limit Optional(int): The number of experiments to run against the dataset. Defaults to running the entire dataset. + offset Optional(int): The number of dataset rows to skip before starting the evaluation. The function uses a custom prediction function (_predict) that generates answers using DuoChat. It then evaluates these answers using the @@ -56,6 +60,14 @@ def docs(client: Client, dataset: str): prefix = f"Run {dataset} on GDK on {time}" data = client.list_examples(dataset_name=dataset) + + data_with_offset = islice(data, offset, None) - # Run LangSmith evaluation. - ls_evaluate(_predict, data=data, evaluators=evaluators, client=client, experiment_prefix=prefix) + # Apply limit if set + if limit is not None: + paginated_data = islice(data_with_offset, limit) + else: + paginated_data = data_with_offset + + + ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix) -- GitLab From cf030dcd62336bd60b2794b5d31000454f9c2ab2 Mon Sep 17 00:00:00 2001 From: Nathan Weinshenker Date: Thu, 24 Oct 2024 13:42:57 +0200 Subject: [PATCH 2/4] Fix formatting issue --- eli5/cli/duochat/evaluate_v1.py | 9 ++++----- eli5/duochat/evaluation/docs.py | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py index 0eec83a0..f25fb9b0 100644 --- a/eli5/cli/duochat/evaluate_v1.py +++ b/eli5/cli/duochat/evaluate_v1.py @@ -61,17 +61,16 @@ def docs( offset: Annotated[ Optional[int], typer.Option( - show_default=True, - help="The number of dataset rows to skip before starting the evaluation." - ) + show_default=True, help="The number of dataset rows to skip before starting the evaluation." + ), ] = 0, limit: Annotated[ Optional[int], typer.Option( show_default=False, help="The maximum number of dataset rows to evaluate after the offset." - " If not set, all remaining rows will be evaluated." - ) + " If not set, all remaining rows will be evaluated.", + ), ] = 0, ): duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset) diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py index 40acc6ca..6086c2b4 100644 --- a/eli5/duochat/evaluation/docs.py +++ b/eli5/duochat/evaluation/docs.py @@ -60,7 +60,7 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in prefix = f"Run {dataset} on GDK on {time}" data = client.list_examples(dataset_name=dataset) - + data_with_offset = islice(data, offset, None) # Apply limit if set @@ -69,5 +69,4 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in else: paginated_data = data_with_offset - ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix) -- GitLab From ceaa8ce0fbbbf93d1e2a15d3f87b3ea1e6abf612 Mon Sep 17 00:00:00 2001 From: Nathan Weinshenker Date: Thu, 24 Oct 2024 13:55:56 +0200 Subject: [PATCH 3/4] One last commit change --- eli5/cli/duochat/evaluate_v1.py | 2 +- eli5/duochat/evaluation/docs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py index f25fb9b0..4c947850 100644 --- a/eli5/cli/duochat/evaluate_v1.py +++ b/eli5/cli/duochat/evaluate_v1.py @@ -71,7 +71,7 @@ def docs( help="The maximum number of dataset rows to evaluate after the offset." " If not set, all remaining rows will be evaluated.", ), - ] = 0, + ] = None, ): duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset) diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py index 6086c2b4..956437de 100644 --- a/eli5/duochat/evaluation/docs.py +++ b/eli5/duochat/evaluation/docs.py @@ -68,5 +68,5 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in paginated_data = islice(data_with_offset, limit) else: paginated_data = data_with_offset - + ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix) -- GitLab From 1becde53b5761d2d32e0cb27e2423e2f74234234 Mon Sep 17 00:00:00 2001 From: Nathan Weinshenker Date: Thu, 24 Oct 2024 14:11:09 +0200 Subject: [PATCH 4/4] Fix stuff --- eli5/duochat/evaluation/docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py index 956437de..6086c2b4 100644 --- a/eli5/duochat/evaluation/docs.py +++ b/eli5/duochat/evaluation/docs.py @@ -68,5 +68,5 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in paginated_data = islice(data_with_offset, limit) else: paginated_data = data_with_offset - + ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix) -- GitLab