[go: up one dir, main page]

File: cache_datasets.py

package info (click to toggle)
seaborn 0.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,148 kB
  • sloc: python: 36,560; makefile: 183; javascript: 45; sh: 15
file content (27 lines) | stat: -rw-r--r-- 643 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
"""
Cache test datasets before running tests / building docs.

Avoids race conditions that would arise from parallelization.
"""
import pathlib
import re

from seaborn import load_dataset

path = pathlib.Path(".")
py_files = path.rglob("*.py")
ipynb_files = path.rglob("*.ipynb")

datasets = []

for fname in py_files:
    with open(fname) as fid:
        datasets += re.findall(r"load_dataset\(['\"](\w+)['\"]", fid.read())

for p in ipynb_files:
    with p.open() as fid:
        datasets += re.findall(r"load_dataset\(\\['\"](\w+)\\['\"]", fid.read())

for name in sorted(set(datasets)):
    print(f"Caching {name}")
    load_dataset(name)