From 2126fe018d145c0a574103b0b507f0ed32b7a983 Mon Sep 17 00:00:00 2001
From: Nathan Weinshenker <nweinshenker@gitlab.com>
Date: Thu, 24 Oct 2024 12:45:50 +0200
Subject: [PATCH 1/4] Pagination added to the following LangSmith dataset

We should probably ask the following LangSmith group about whether
this will be implemented but it's pretty easy in general
---
 eli5/cli/duochat/evaluate_v1.py | 19 +++++++++++++++++--
 eli5/duochat/evaluation/docs.py | 18 +++++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py
index 89b5b25b..0eec83a0 100644
--- a/eli5/cli/duochat/evaluate_v1.py
+++ b/eli5/cli/duochat/evaluate_v1.py
@@ -35,7 +35,7 @@ LangSmith client, which can be reused by the command.
 """
 
 from pathlib import Path
-from typing import Annotated
+from typing import Annotated, Optional
 
 import typer
 
@@ -58,8 +58,23 @@ def docs(
             " containing examples with context, questions, and possible answers.",
         ),
     ] = "duo_chat.cot_qa_docs.1",
+    offset: Annotated[
+        Optional[int],
+        typer.Option(
+            show_default=True,
+            help="The number of dataset rows to skip before starting the evaluation."
+        )
+    ] = 0,
+    limit: Annotated[
+        Optional[int],
+        typer.Option(
+            show_default=False,
+            help="The maximum number of dataset rows to evaluate after the offset."
+                " If not set, all remaining rows will be evaluated."
+        )
+    ] = 0,
 ):
-    duochat.evaluation.docs(ctx.obj.langsmith_client, dataset)
+    duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset)
 
 
 @app.command(help="Evaluate DuoChat's accuracy on resource-related question answering using LangSmith.")
diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py
index 9bb20e21..40acc6ca 100644
--- a/eli5/duochat/evaluation/docs.py
+++ b/eli5/duochat/evaluation/docs.py
@@ -1,4 +1,6 @@
 from datetime import datetime
+from itertools import islice
+from typing import Optional
 
 from langchain_anthropic import ChatAnthropic
 from langsmith import Client
@@ -16,13 +18,15 @@ def _predict(inputs: dict) -> dict:
     return {"actual_answer": actual_answer}
 
 
-def docs(client: Client, dataset: str):
+def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[int]):
     """
     Evaluate the accuracy of DuoChat answers against a given dataset using LangSmith.
 
     Args:
         client (Client): A LangSmith Client instance for interacting with the LangSmith API.
         dataset (str): The name of the dataset to use for evaluation compatible with the format of 'duo_chat.cot_qa_docs.1'.
+        limit Optional(int): The number of experiments to run against the dataset. Defaults to running the entire dataset.
+        offset Optional(int): The number of dataset rows to skip before starting the evaluation.
 
     The function uses a custom prediction function (_predict) that generates answers
     using DuoChat. It then evaluates these answers using the
@@ -56,6 +60,14 @@ def docs(client: Client, dataset: str):
     prefix = f"Run {dataset} on GDK on {time}"
 
     data = client.list_examples(dataset_name=dataset)
+    
+    data_with_offset = islice(data, offset, None)
 
-    # Run LangSmith evaluation.
-    ls_evaluate(_predict, data=data, evaluators=evaluators, client=client, experiment_prefix=prefix)
+    # Apply limit if set
+    if limit is not None:
+        paginated_data = islice(data_with_offset, limit)
+    else:
+        paginated_data = data_with_offset
+
+
+    ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix)
-- 
GitLab


From cf030dcd62336bd60b2794b5d31000454f9c2ab2 Mon Sep 17 00:00:00 2001
From: Nathan Weinshenker <nweinshenker@gitlab.com>
Date: Thu, 24 Oct 2024 13:42:57 +0200
Subject: [PATCH 2/4] Fix formatting issue

---
 eli5/cli/duochat/evaluate_v1.py | 9 ++++-----
 eli5/duochat/evaluation/docs.py | 3 +--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py
index 0eec83a0..f25fb9b0 100644
--- a/eli5/cli/duochat/evaluate_v1.py
+++ b/eli5/cli/duochat/evaluate_v1.py
@@ -61,17 +61,16 @@ def docs(
     offset: Annotated[
         Optional[int],
         typer.Option(
-            show_default=True,
-            help="The number of dataset rows to skip before starting the evaluation."
-        )
+            show_default=True, help="The number of dataset rows to skip before starting the evaluation."
+        ),
     ] = 0,
     limit: Annotated[
         Optional[int],
         typer.Option(
             show_default=False,
             help="The maximum number of dataset rows to evaluate after the offset."
-                " If not set, all remaining rows will be evaluated."
-        )
+            " If not set, all remaining rows will be evaluated.",
+        ),
     ] = 0,
 ):
     duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset)
diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py
index 40acc6ca..6086c2b4 100644
--- a/eli5/duochat/evaluation/docs.py
+++ b/eli5/duochat/evaluation/docs.py
@@ -60,7 +60,7 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in
     prefix = f"Run {dataset} on GDK on {time}"
 
     data = client.list_examples(dataset_name=dataset)
-    
+
     data_with_offset = islice(data, offset, None)
 
     # Apply limit if set
@@ -69,5 +69,4 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in
     else:
         paginated_data = data_with_offset
 
-
     ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix)
-- 
GitLab


From ceaa8ce0fbbbf93d1e2a15d3f87b3ea1e6abf612 Mon Sep 17 00:00:00 2001
From: Nathan Weinshenker <nweinshenker@gitlab.com>
Date: Thu, 24 Oct 2024 13:55:56 +0200
Subject: [PATCH 3/4] One last commit change

---
 eli5/cli/duochat/evaluate_v1.py | 2 +-
 eli5/duochat/evaluation/docs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/eli5/cli/duochat/evaluate_v1.py b/eli5/cli/duochat/evaluate_v1.py
index f25fb9b0..4c947850 100644
--- a/eli5/cli/duochat/evaluate_v1.py
+++ b/eli5/cli/duochat/evaluate_v1.py
@@ -71,7 +71,7 @@ def docs(
             help="The maximum number of dataset rows to evaluate after the offset."
             " If not set, all remaining rows will be evaluated.",
         ),
-    ] = 0,
+    ] = None,
 ):
     duochat.evaluation.docs(ctx.obj.langsmith_client, dataset, limit, offset)
 
diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py
index 6086c2b4..956437de 100644
--- a/eli5/duochat/evaluation/docs.py
+++ b/eli5/duochat/evaluation/docs.py
@@ -68,5 +68,5 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in
         paginated_data = islice(data_with_offset, limit)
     else:
         paginated_data = data_with_offset
-
+    
     ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix)
-- 
GitLab


From 1becde53b5761d2d32e0cb27e2423e2f74234234 Mon Sep 17 00:00:00 2001
From: Nathan Weinshenker <nweinshenker@gitlab.com>
Date: Thu, 24 Oct 2024 14:11:09 +0200
Subject: [PATCH 4/4] Fix stuff

---
 eli5/duochat/evaluation/docs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eli5/duochat/evaluation/docs.py b/eli5/duochat/evaluation/docs.py
index 956437de..6086c2b4 100644
--- a/eli5/duochat/evaluation/docs.py
+++ b/eli5/duochat/evaluation/docs.py
@@ -68,5 +68,5 @@ def docs(client: Client, dataset: str, limit: Optional[int], offset: Optional[in
         paginated_data = islice(data_with_offset, limit)
     else:
         paginated_data = data_with_offset
-    
+
     ls_evaluate(_predict, data=paginated_data, evaluators=evaluators, client=client, experiment_prefix=prefix)
-- 
GitLab