feat: Add bot and benchmark filtering to skia_export This change introduces new flags `--bot_to_filter` and `--benchmark_to_filter` to the skia_export pipeline. This allows for more targeted data exports, which is useful for debugging and for preventing overwhelming the backfill process with too much data. The README.md has been updated to document these new flags. Additionally, .gitignore is updated to ignore skia_export.egg-info directories. Bug: chromium:439977560 Change-Id: I0e95d120798b7c08efdd604083fc595bfa320c95 Reviewed-on: https://chromium-review.googlesource.com/c/catapult/+/7018141 Reviewed-by: Wenbin Zhang <wenbinzhang@google.com> Commit-Queue: Eduardo Yap <eduardoyap@google.com>

commit: 0a069d4549a6619fed11ef5e5f7731e69aa4f8a9 [log] [tgz]
author: Eduardo Yap <eduardoyap@google.com> Tue Oct 07 21:13:51 2025
committer: Catapult LUCI CQ <catapult-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Oct 10 19:39:03 2025
tree: 0468e285bf80258934a1d6f335cb2af99b491c15
parent: 08cc56f0f4b3d0a648c16df32d9ae722a8867e21 [diff]
diff --git a/.gitignore b/.gitignore
index 8f28dbc..18c8cff 100644
--- a/.gitignore
+++ b/.gitignore

@@ -52,3 +52,6 @@
 
 # Node dependency download folders
 node_modules
+
+# Ignore Python package build artifacts
+skia_export/skia_export.egg-info/

diff --git a/dashboard/skia_export/README.md b/dashboard/skia_export/README.md
index d0bf0a2..246cb5c 100644
--- a/dashboard/skia_export/README.md
+++ b/dashboard/skia_export/README.md

@@ -57,6 +57,23 @@
 --repo_to_export=v8
 ```
 
+### Testing on a Specific Bot or Benchmark
+
+To narrow down your test to a specific bot or benchmark, you can use the following flags. This is useful for debugging data from a particular test configuration.
+
+For filtering by bot:
+
+```bash
+--bot_to_filter=linux-perf
+```
+
+For filtering by benchmark:
+```bash
+--benchmark_to_filter=speedometer
+```
+
+You can use these flags in combination with the time range and repository filters.
+
 ## Updating Templates
 
 Once your changes have been tested and merged, you need to update the Dataflow template so that your changes are reflected in production. Run the following command:
@@ -90,7 +107,7 @@
   --subnetwork=regions/us-central1/subnetworks/dashboard-batch \
   --worker-machine-type=e2-standard-4 \
   --project=chromeperf \
-  --parameters=start_time=202304250000,end_time=202304260000,repo_to_export=fuchsia
+  --parameters=start_time=202304250000,end_time=202304260000,repo_to_export=fuchsia,bot_to_filter=linux-perf
 ```
 
-Make sure you set `start_time` and `end_time` to the desired range in `YYYYMMDDHHmm` format. You can also change the `repo_to_export` parameter.
+Make sure you set `start_time` and `end_time` to the desired range in `YYYYMMDDHHmm` format. You can also specify `repo_to_export`, `bot_to_filter`, and `benchmark_to_filter` as needed.

diff --git a/dashboard/skia_export/skia_export/export_options.py b/dashboard/skia_export/skia_export/export_options.py
index 61ffd4c..c07661b 100644
--- a/dashboard/skia_export/skia_export/export_options.py
+++ b/dashboard/skia_export/skia_export/export_options.py

@@ -20,6 +20,8 @@
     start_time: ValueProvider for the start of the time range to fetch.
     testing: ValueProvider to enable/disable testing mode (no GCS upload).
     repo_to_export: Specific repository to export (e.g., chromium, webrtc).
+    bot_to_filter: Specific bot to filter for.
+    benchmark_to_filter: Specific benchmark to filter for.
   """
 
   @classmethod
@@ -46,6 +48,18 @@
               'GCS Buckets.'),
         default='all',
     )
+    parser.add_value_provider_argument(
+        '--bot_to_filter',
+        help=('Specify a bot name to filter for (e.g., "linux-perf"). '
+              'If not specified, all bots are included.'),
+        default='all',
+    )
+    parser.add_value_provider_argument(
+        '--benchmark_to_filter',
+        help=('Specify a benchmark name to filter for (e.g., "speedometer"). '
+              'If not specified, all benchmarks are included.'),
+        default='all',
+    )
 
   def GetTimeRangeProvider(self):
     """Return an object with .Get() method that returns (start, end) tuple."""

diff --git a/dashboard/skia_export/skia_export/skia_pipeline.py b/dashboard/skia_export/skia_export/skia_pipeline.py
index 9453c34..1a9418f 100644
--- a/dashboard/skia_export/skia_export/skia_pipeline.py
+++ b/dashboard/skia_export/skia_export/skia_pipeline.py

@@ -47,7 +47,6 @@
             # 'ChromiumPerfPGO', 'ChromiumPerf',
         ],
         'public_bucket_name': 'chrome-perf-public',
-        # 'public_bucket_name': None,
         'internal_bucket_name': 'chrome-perf-non-public',
         # 'internal_bucket_name': 'chrome-perf-non-public-secondary',
         'ingest_folder': 'ingest',
@@ -171,30 +170,19 @@
     """
     row_entities_read.inc()
     try:
-      # UNCOMMENT TO ADD FILTERS THAT ARE NOT MASTERS
-      # # --- START NEW FILTER LOGIC ---
       test_path_parts = entity.key.parent.name.split('/')
+      bot_to_filter = export_options.bot_to_filter.get()
+      benchmark_to_filter = export_options.benchmark_to_filter.get()
 
-      # # 1. Check Bot/Benchmark filter
-      # bot_name = test_path_parts[1] if len(test_path_parts) > 1 else None
-      # benchmark_name = test_path_parts[2] if len(test_path_parts) > 2 else None
+      if bot_to_filter != 'all':
+        bot_name = test_path_parts[1] if len(test_path_parts) > 1 else None
+        if bot_name != bot_to_filter:
+          return []  # Fails Bot filter
 
-      # if not (benchmark_name == 'startup.mobile'):
-      #   return [] # Fails Bot/Benchmark filter
-
-      # # 2. Check Underscore filter
-      # subtest_1 = test_path_parts[4] if len(test_path_parts) > 4 else None
-      # subtest_2 = test_path_parts[5] if len(test_path_parts) > 5 else None
-
-      # has_underscore = False
-      # if subtest_1 and '_' in subtest_1:
-      #   has_underscore = True
-      # if not has_underscore and subtest_2 and '_' in subtest_2:
-      #   has_underscore = True
-
-      # if not has_underscore:
-      #   return [] # Fails Underscore filter
-      # # --- END NEW FILTER LOGIC ---
+      if benchmark_to_filter != 'all':
+        benchmark_name = test_path_parts[2] if len(test_path_parts) > 2 else None
+        if benchmark_name != benchmark_to_filter:
+          return []  # Fails Benchmark filter
 
       # # If we get here, the row passed both filters. Proceed.
       d = {
commit	0a069d4549a6619fed11ef5e5f7731e69aa4f8a9	[log] [tgz]
author	Eduardo Yap <eduardoyap@google.com>	Tue Oct 07 21:13:51 2025
committer	Catapult LUCI CQ <catapult-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Oct 10 19:39:03 2025
tree	0468e285bf80258934a1d6f335cb2af99b491c15
parent	08cc56f0f4b3d0a648c16df32d9ae722a8867e21 [diff]