use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType};
use super::{
display_fmt,
specification::{check_offsets_and_utf8, check_offsets_minimal},
Array, GenericBinaryArray, Offset,
};
mod ffi;
mod from;
mod iterator;
mod mutable;
pub use iterator::*;
pub use mutable::*;
#[derive(Debug, Clone)]
pub struct Utf8Array<O: Offset> {
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
offset: usize,
}
impl<O: Offset> Utf8Array<O> {
#[inline]
pub fn new_empty(data_type: DataType) -> Self {
unsafe {
Self::from_data_unchecked(data_type, Buffer::from(&[O::zero()]), Buffer::new(), None)
}
}
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::from_data(
data_type,
Buffer::new_zeroed(length + 1),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
}
pub fn from_data(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
check_offsets_and_utf8(&offsets, &values);
if let Some(ref validity) = validity {
assert_eq!(offsets.len() - 1, validity.len());
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
panic!("Utf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
}
Self {
data_type,
offsets,
values,
validity,
offset: 0,
}
}
pub fn default_data_type() -> DataType {
if O::is_large() {
DataType::LargeUtf8
} else {
DataType::Utf8
}
}
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
check_offsets_minimal(&offsets, values.len());
if let Some(ref validity) = validity {
assert_eq!(offsets.len() - 1, validity.len());
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
panic!("Utf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
}
Self {
data_type,
offsets,
values,
validity,
offset: 0,
}
}
pub fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
}
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}
impl<O: Offset> Utf8Array<O> {
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();
let slice = self.values.get_unchecked(start..end);
std::str::from_utf8_unchecked(slice)
}
pub fn value(&self, i: usize) -> &str {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();
let slice = unsafe { self.values.get_unchecked(start..end) };
unsafe { std::str::from_utf8_unchecked(slice) }
}
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
&self.offsets
}
#[inline]
pub fn values(&self) -> &Buffer<u8> {
&self.values
}
}
impl<O: Offset> Array for Utf8Array<O> {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}
#[inline]
fn len(&self) -> usize {
self.offsets.len() - 1
}
#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
}
fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
}
impl<O: Offset> std::fmt::Display for Utf8Array<O> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
display_fmt(self.iter(), &format!("{}", self.data_type()), f, false)
}
}
unsafe impl<O: Offset> GenericBinaryArray<O> for Utf8Array<O> {
#[inline]
fn values(&self) -> &[u8] {
self.values()
}
#[inline]
fn offsets(&self) -> &[O] {
self.offsets()
}
}