#![deny(missing_docs)]
use std::any::Any;
use crate::error::Result;
use crate::types::{days_ms, months_days_ns};
use crate::{
bitmap::{Bitmap, MutableBitmap},
datatypes::DataType,
};
pub(self) mod physical_binary;
pub trait Array: Send + Sync {
fn as_any(&self) -> &dyn Any;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn data_type(&self) -> &DataType;
fn validity(&self) -> Option<&Bitmap>;
#[inline]
fn null_count(&self) -> usize {
if self.data_type() == &DataType::Null {
return self.len();
};
self.validity()
.as_ref()
.map(|x| x.null_count())
.unwrap_or(0)
}
#[inline]
fn is_null(&self, i: usize) -> bool {
self.validity()
.as_ref()
.map(|x| !x.get_bit(i))
.unwrap_or(false)
}
#[inline]
fn is_valid(&self, i: usize) -> bool {
!self.is_null(i)
}
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array>;
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array>;
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
}
pub trait MutableArray: std::fmt::Debug + Send + Sync {
fn data_type(&self) -> &DataType;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn validity(&self) -> Option<&MutableBitmap>;
fn as_box(&mut self) -> Box<dyn Array>;
fn as_arc(&mut self) -> Arc<dyn Array> {
self.as_box().into()
}
fn as_any(&self) -> &dyn Any;
fn as_mut_any(&mut self) -> &mut dyn Any;
fn push_null(&mut self);
#[inline]
fn is_valid(&self, index: usize) -> bool {
self.validity()
.as_ref()
.map(|x| x.get(index))
.unwrap_or(true)
}
fn shrink_to_fit(&mut self);
}
macro_rules! general_dyn {
($array:expr, $ty:ty, $f:expr) => {{
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
($f)(array)
}};
}
macro_rules! fmt_dyn {
($array:expr, $ty:ty, $f:expr) => {{
let mut f = |x: &$ty| x.fmt($f);
general_dyn!($array, $ty, f)
}};
}
macro_rules! match_integer_type {(
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::IntegerType::*;
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
UInt8 => __with_ty__! { u8 },
UInt16 => __with_ty__! { u16 },
UInt32 => __with_ty__! { u32 },
UInt64 => __with_ty__! { u64 },
}
})}
macro_rules! with_match_primitive_type {(
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
UInt16 => __with_ty__! { u16 },
UInt32 => __with_ty__! { u32 },
UInt64 => __with_ty__! { u64 },
Float32 => __with_ty__! { f32 },
Float64 => __with_ty__! { f64 },
}
})}
impl std::fmt::Debug for dyn Array + '_ {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use crate::datatypes::PhysicalType::*;
match self.data_type().to_physical_type() {
Null => fmt_dyn!(self, NullArray, f),
Boolean => fmt_dyn!(self, BooleanArray, f),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
fmt_dyn!(self, PrimitiveArray<$T>, f)
}),
Binary => fmt_dyn!(self, BinaryArray<i32>, f),
LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
List => fmt_dyn!(self, ListArray::<i32>, f),
LargeList => fmt_dyn!(self, ListArray::<i64>, f),
FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
Struct => fmt_dyn!(self, StructArray, f),
Union => fmt_dyn!(self, UnionArray, f),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
fmt_dyn!(self, DictionaryArray::<$T>, f)
})
}
Map => todo!(),
}
}
}
pub fn new_empty_array(data_type: DataType) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null => Box::new(NullArray::new_empty(data_type)),
Boolean => Box::new(BooleanArray::new_empty(data_type)),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
Box::new(PrimitiveArray::<$T>::new_empty(data_type))
}),
Binary => Box::new(BinaryArray::<i32>::new_empty(data_type)),
LargeBinary => Box::new(BinaryArray::<i64>::new_empty(data_type)),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(data_type)),
Utf8 => Box::new(Utf8Array::<i32>::new_empty(data_type)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(data_type)),
List => Box::new(ListArray::<i32>::new_empty(data_type)),
LargeList => Box::new(ListArray::<i64>::new_empty(data_type)),
FixedSizeList => Box::new(FixedSizeListArray::new_empty(data_type)),
Struct => Box::new(StructArray::new_empty(data_type)),
Union => Box::new(UnionArray::new_empty(data_type)),
Map => Box::new(MapArray::new_empty(data_type)),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
Box::new(DictionaryArray::<$T>::new_empty(data_type))
})
}
}
}
pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null => Box::new(NullArray::new_null(data_type, length)),
Boolean => Box::new(BooleanArray::new_null(data_type, length)),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
Box::new(PrimitiveArray::<$T>::new_null(data_type, length))
}),
Binary => Box::new(BinaryArray::<i32>::new_null(data_type, length)),
LargeBinary => Box::new(BinaryArray::<i64>::new_null(data_type, length)),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(data_type, length)),
Utf8 => Box::new(Utf8Array::<i32>::new_null(data_type, length)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(data_type, length)),
List => Box::new(ListArray::<i32>::new_null(data_type, length)),
LargeList => Box::new(ListArray::<i64>::new_null(data_type, length)),
FixedSizeList => Box::new(FixedSizeListArray::new_null(data_type, length)),
Struct => Box::new(StructArray::new_null(data_type, length)),
Union => Box::new(UnionArray::new_null(data_type, length)),
Map => Box::new(MapArray::new_null(data_type, length)),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
Box::new(DictionaryArray::<$T>::new_null(data_type, length))
})
}
}
}
macro_rules! clone_dyn {
($array:expr, $ty:ty) => {{
let f = |x: &$ty| Box::new(x.clone());
general_dyn!($array, $ty, f)
}};
}
pub fn clone(array: &dyn Array) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match array.data_type().to_physical_type() {
Null => clone_dyn!(array, NullArray),
Boolean => clone_dyn!(array, BooleanArray),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
clone_dyn!(array, PrimitiveArray<$T>)
}),
Binary => clone_dyn!(array, BinaryArray<i32>),
LargeBinary => clone_dyn!(array, BinaryArray<i64>),
FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
Utf8 => clone_dyn!(array, Utf8Array::<i32>),
LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
List => clone_dyn!(array, ListArray::<i32>),
LargeList => clone_dyn!(array, ListArray::<i64>),
FixedSizeList => clone_dyn!(array, FixedSizeListArray),
Struct => clone_dyn!(array, StructArray),
Union => clone_dyn!(array, UnionArray),
Map => clone_dyn!(array, MapArray),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
clone_dyn!(array, DictionaryArray::<$T>)
})
}
}
}
impl<'a> AsRef<(dyn Array + 'a)> for dyn Array {
fn as_ref(&self) -> &(dyn Array + 'a) {
self
}
}
mod binary;
mod boolean;
mod dictionary;
mod display;
mod fixed_size_binary;
mod fixed_size_list;
mod list;
mod map;
mod null;
mod primitive;
mod specification;
mod struct_;
mod union;
mod utf8;
mod equal;
mod ffi;
pub mod growable;
pub mod ord;
pub use display::get_display;
pub use equal::equal;
pub use crate::types::Offset;
pub use binary::{BinaryArray, BinaryValueIter, MutableBinaryArray};
pub use boolean::{BooleanArray, MutableBooleanArray};
pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
pub use fixed_size_binary::{FixedSizeBinaryArray, MutableFixedSizeBinaryArray};
pub use fixed_size_list::{FixedSizeListArray, MutableFixedSizeListArray};
pub use list::{ListArray, MutableListArray};
pub use map::MapArray;
pub use null::NullArray;
pub use primitive::*;
pub use struct_::StructArray;
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};
pub(crate) use self::ffi::offset_buffers_children_dictionary;
pub(crate) use self::ffi::FromFfi;
pub(crate) use self::ffi::ToFfi;
pub trait TryExtend<A> {
fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> Result<()>;
}
pub trait TryPush<A> {
fn try_push(&mut self, item: A) -> Result<()>;
}
fn display_helper<T: std::fmt::Display, I: IntoIterator<Item = Option<T>>>(iter: I) -> Vec<String> {
iter.into_iter()
.map(|x| match x {
Some(x) => x.to_string(),
None => "None".to_string(),
})
.collect::<Vec<_>>()
}
fn debug_helper<T: std::fmt::Debug, I: IntoIterator<Item = Option<T>>>(iter: I) -> Vec<String> {
iter.into_iter()
.map(|x| match x {
Some(x) => format!("{:?}", x),
None => "None".to_string(),
})
.collect::<Vec<_>>()
}
fn debug_fmt<T: std::fmt::Debug, I: IntoIterator<Item = Option<T>>>(
iter: I,
head: &str,
f: &mut std::fmt::Formatter<'_>,
new_lines: bool,
) -> std::fmt::Result {
let result = debug_helper(iter);
if new_lines {
write!(f, "{}[\n{}\n]", head, result.join(",\n"))
} else {
write!(f, "{}[{}]", head, result.join(", "))
}
}
fn display_fmt<T: std::fmt::Display, I: IntoIterator<Item = Option<T>>>(
iter: I,
head: &str,
f: &mut std::fmt::Formatter<'_>,
new_lines: bool,
) -> std::fmt::Result {
let result = display_helper(iter);
if new_lines {
write!(f, "{}[\n{}\n]", head, result.join(",\n"))
} else {
write!(f, "{}[{}]", head, result.join(", "))
}
}
pub trait IterableListArray: Array {
unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array>;
}
pub unsafe trait GenericBinaryArray<O: Offset>: Array {
fn values(&self) -> &[u8];
fn offsets(&self) -> &[O];
}
use std::sync::Arc;
pub type ArrayRef = Arc<dyn Array>;