use rand::{rngs::StdRng, Rng, SeedableRng};
use std::{env, error::Error, fs, io::Write, path::PathBuf};
pub fn random_bytes(n: usize) -> Vec<u8> {
let mut result = vec![];
let mut rng = seedable_rng();
for _ in 0..n {
result.push(rng.random_range(0..255));
}
result
}
pub fn seedable_rng() -> StdRng {
StdRng::seed_from_u64(42)
}
pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
let mut path_buf = env::current_dir().unwrap();
path_buf.push("target");
path_buf.push("debug");
path_buf.push("testdata");
fs::create_dir_all(&path_buf).unwrap();
path_buf.push(file_name);
let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
tmp_file.write_all(content).unwrap();
tmp_file.sync_all().unwrap();
let file = fs::OpenOptions::new()
.read(true)
.write(true)
.open(path_buf.as_path());
assert!(file.is_ok());
file.unwrap()
}
pub fn arrow_test_data() -> String {
match get_data_dir("ARROW_TEST_DATA", "../testing/data") {
Ok(pb) => pb.display().to_string(),
Err(err) => panic!("failed to get arrow data dir: {err}"),
}
}
pub fn parquet_test_data() -> String {
match get_data_dir("PARQUET_TEST_DATA", "../parquet-testing/data") {
Ok(pb) => pb.display().to_string(),
Err(err) => panic!("failed to get parquet data dir: {err}"),
}
}
fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn Error>> {
if let Ok(dir) = env::var(udf_env) {
let trimmed = dir.trim().to_string();
if !trimmed.is_empty() {
let pb = PathBuf::from(trimmed);
if pb.is_dir() {
return Ok(pb);
} else {
return Err(format!(
"the data dir `{}` defined by env {} not found",
pb.display(),
udf_env
)
.into());
}
}
}
let dir = env!("CARGO_MANIFEST_DIR");
let pb = PathBuf::from(dir).join(submodule_data);
if pb.is_dir() {
Ok(pb)
} else {
Err(format!(
"env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
HINT: try running `git submodule update --init`",
udf_env,
pb.display(),
).into())
}
}
#[derive(Debug, Clone)]
pub struct BadIterator<T> {
cur: usize,
limit: usize,
claimed: usize,
pub items: Vec<T>,
}
impl<T> BadIterator<T> {
pub fn new(limit: usize, claimed: usize, items: Vec<T>) -> Self {
assert!(!items.is_empty());
Self {
cur: 0,
limit,
claimed,
items,
}
}
}
impl<T: Clone> Iterator for BadIterator<T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
if self.cur < self.limit {
let next_item_idx = self.cur % self.items.len();
let next_item = self.items[next_item_idx].clone();
self.cur += 1;
Some(next_item)
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.claimed))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_data_dir() {
let udf_env = "get_data_dir";
let cwd = env::current_dir().unwrap();
let existing_pb = cwd.join("..");
let existing = existing_pb.display().to_string();
let existing_str = existing.as_str();
let non_existing = cwd.join("non-existing-dir").display().to_string();
let non_existing_str = non_existing.as_str();
env::set_var(udf_env, non_existing_str);
let res = get_data_dir(udf_env, existing_str);
assert!(res.is_err());
env::set_var(udf_env, "");
let res = get_data_dir(udf_env, existing_str);
assert!(res.is_ok());
assert_eq!(res.unwrap(), existing_pb);
env::set_var(udf_env, " ");
let res = get_data_dir(udf_env, existing_str);
assert!(res.is_ok());
assert_eq!(res.unwrap(), existing_pb);
env::set_var(udf_env, existing_str);
let res = get_data_dir(udf_env, existing_str);
assert!(res.is_ok());
assert_eq!(res.unwrap(), existing_pb);
env::remove_var(udf_env);
let res = get_data_dir(udf_env, non_existing_str);
assert!(res.is_err());
let res = get_data_dir(udf_env, existing_str);
assert!(res.is_ok());
assert_eq!(res.unwrap(), existing_pb);
}
#[test]
fn test_happy() {
let res = arrow_test_data();
assert!(PathBuf::from(res).is_dir());
let res = parquet_test_data();
assert!(PathBuf::from(res).is_dir());
}
}