use crate::{
array::*,
buffer::Buffer,
datatypes::DataType,
error::{ArrowError, Result},
};
fn unary_offsets_string<O, F>(array: &Utf8Array<O>, op: F) -> PrimitiveArray<O>
where
O: Offset,
F: Fn(O) -> O,
{
let values = array
.offsets()
.windows(2)
.map(|offset| op(offset[1] - offset[0]));
let values = Buffer::from_trusted_len_iter(values);
let data_type = if O::is_large() {
DataType::Int64
} else {
DataType::Int32
};
PrimitiveArray::<O>::from_data(data_type, values, array.validity().cloned())
}
pub fn length(array: &dyn Array) -> Result<Box<dyn Array>> {
match array.data_type() {
DataType::Utf8 => {
let array = array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
Ok(Box::new(unary_offsets_string::<i32, _>(array, |x| x)))
}
DataType::LargeUtf8 => {
let array = array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
Ok(Box::new(unary_offsets_string::<i64, _>(array, |x| x)))
}
_ => Err(ArrowError::InvalidArgumentError(format!(
"length not supported for {:?}",
array.data_type()
))),
}
}
pub fn can_length(data_type: &DataType) -> bool {
matches!(data_type, DataType::Utf8 | DataType::LargeUtf8)
}