feat: Add bot and benchmark filtering to skia_export
This change introduces new flags `--bot_to_filter` and
`--benchmark_to_filter` to the skia_export pipeline. This allows for
more targeted data exports, which is useful for debugging and for
preventing overwhelming the backfill process with too much data.
The README.md has been updated to document these new flags.
Additionally, .gitignore is updated to ignore skia_export.egg-info
directories.
Bug: chromium:439977560
Change-Id: I0e95d120798b7c08efdd604083fc595bfa320c95
Reviewed-on: https://chromium-review.googlesource.com/c/catapult/+/7018141
Reviewed-by: Wenbin Zhang <wenbinzhang@google.com>
Commit-Queue: Eduardo Yap <eduardoyap@google.com>
diff --git a/.gitignore b/.gitignore
index 8f28dbc..18c8cff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,3 +52,6 @@
# Node dependency download folders
node_modules
+
+# Ignore Python package build artifacts
+skia_export/skia_export.egg-info/
diff --git a/dashboard/skia_export/README.md b/dashboard/skia_export/README.md
index d0bf0a2..246cb5c 100644
--- a/dashboard/skia_export/README.md
+++ b/dashboard/skia_export/README.md
@@ -57,6 +57,23 @@
--repo_to_export=v8
```
+### Testing on a Specific Bot or Benchmark
+
+To narrow down your test to a specific bot or benchmark, you can use the following flags. This is useful for debugging data from a particular test configuration.
+
+For filtering by bot:
+
+```bash
+--bot_to_filter=linux-perf
+```
+
+For filtering by benchmark:
+```bash
+--benchmark_to_filter=speedometer
+```
+
+You can use these flags in combination with the time range and repository filters.
+
## Updating Templates
Once your changes have been tested and merged, you need to update the Dataflow template so that your changes are reflected in production. Run the following command:
@@ -90,7 +107,7 @@
--subnetwork=regions/us-central1/subnetworks/dashboard-batch \
--worker-machine-type=e2-standard-4 \
--project=chromeperf \
- --parameters=start_time=202304250000,end_time=202304260000,repo_to_export=fuchsia
+ --parameters=start_time=202304250000,end_time=202304260000,repo_to_export=fuchsia,bot_to_filter=linux-perf
```
-Make sure you set `start_time` and `end_time` to the desired range in `YYYYMMDDHHmm` format. You can also change the `repo_to_export` parameter.
+Make sure you set `start_time` and `end_time` to the desired range in `YYYYMMDDHHmm` format. You can also specify `repo_to_export`, `bot_to_filter`, and `benchmark_to_filter` as needed.
diff --git a/dashboard/skia_export/skia_export/export_options.py b/dashboard/skia_export/skia_export/export_options.py
index 61ffd4c..c07661b 100644
--- a/dashboard/skia_export/skia_export/export_options.py
+++ b/dashboard/skia_export/skia_export/export_options.py
@@ -20,6 +20,8 @@
start_time: ValueProvider for the start of the time range to fetch.
testing: ValueProvider to enable/disable testing mode (no GCS upload).
repo_to_export: Specific repository to export (e.g., chromium, webrtc).
+ bot_to_filter: Specific bot to filter for.
+ benchmark_to_filter: Specific benchmark to filter for.
"""
@classmethod
@@ -46,6 +48,18 @@
'GCS Buckets.'),
default='all',
)
+ parser.add_value_provider_argument(
+ '--bot_to_filter',
+ help=('Specify a bot name to filter for (e.g., "linux-perf"). '
+ 'If not specified, all bots are included.'),
+ default='all',
+ )
+ parser.add_value_provider_argument(
+ '--benchmark_to_filter',
+ help=('Specify a benchmark name to filter for (e.g., "speedometer"). '
+ 'If not specified, all benchmarks are included.'),
+ default='all',
+ )
def GetTimeRangeProvider(self):
"""Return an object with .Get() method that returns (start, end) tuple."""
diff --git a/dashboard/skia_export/skia_export/skia_pipeline.py b/dashboard/skia_export/skia_export/skia_pipeline.py
index 9453c34..1a9418f 100644
--- a/dashboard/skia_export/skia_export/skia_pipeline.py
+++ b/dashboard/skia_export/skia_export/skia_pipeline.py
@@ -47,7 +47,6 @@
# 'ChromiumPerfPGO', 'ChromiumPerf',
],
'public_bucket_name': 'chrome-perf-public',
- # 'public_bucket_name': None,
'internal_bucket_name': 'chrome-perf-non-public',
# 'internal_bucket_name': 'chrome-perf-non-public-secondary',
'ingest_folder': 'ingest',
@@ -171,30 +170,19 @@
"""
row_entities_read.inc()
try:
- # UNCOMMENT TO ADD FILTERS THAT ARE NOT MASTERS
- # # --- START NEW FILTER LOGIC ---
test_path_parts = entity.key.parent.name.split('/')
+ bot_to_filter = export_options.bot_to_filter.get()
+ benchmark_to_filter = export_options.benchmark_to_filter.get()
- # # 1. Check Bot/Benchmark filter
- # bot_name = test_path_parts[1] if len(test_path_parts) > 1 else None
- # benchmark_name = test_path_parts[2] if len(test_path_parts) > 2 else None
+ if bot_to_filter != 'all':
+ bot_name = test_path_parts[1] if len(test_path_parts) > 1 else None
+ if bot_name != bot_to_filter:
+ return [] # Fails Bot filter
- # if not (benchmark_name == 'startup.mobile'):
- # return [] # Fails Bot/Benchmark filter
-
- # # 2. Check Underscore filter
- # subtest_1 = test_path_parts[4] if len(test_path_parts) > 4 else None
- # subtest_2 = test_path_parts[5] if len(test_path_parts) > 5 else None
-
- # has_underscore = False
- # if subtest_1 and '_' in subtest_1:
- # has_underscore = True
- # if not has_underscore and subtest_2 and '_' in subtest_2:
- # has_underscore = True
-
- # if not has_underscore:
- # return [] # Fails Underscore filter
- # # --- END NEW FILTER LOGIC ---
+ if benchmark_to_filter != 'all':
+ benchmark_name = test_path_parts[2] if len(test_path_parts) > 2 else None
+ if benchmark_name != benchmark_to_filter:
+ return [] # Fails Benchmark filter
# # If we get here, the row passed both filters. Proceed.
d = {