use std::collections::BTreeMap;
use crate::error::{ArrowError, Result};
use super::DataType;
#[derive(Debug, Clone, Eq)]
pub struct Field {
pub name: String,
pub data_type: DataType,
pub nullable: bool,
pub dict_id: i64,
pub dict_is_ordered: bool,
pub metadata: Option<BTreeMap<String, String>>,
}
impl std::hash::Hash for Field {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.data_type.hash(state);
self.nullable.hash(state);
self.dict_is_ordered.hash(state);
self.metadata.hash(state);
}
}
impl PartialEq for Field {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.data_type == other.data_type
&& self.nullable == other.nullable
&& self.dict_is_ordered == other.dict_is_ordered
&& self.metadata == other.metadata
}
}
impl Field {
pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
Field {
name: name.to_string(),
data_type,
nullable,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
}
}
pub fn new_dict(
name: &str,
data_type: DataType,
nullable: bool,
dict_id: i64,
dict_is_ordered: bool,
) -> Self {
Field {
name: name.to_string(),
data_type,
nullable,
dict_id,
dict_is_ordered,
metadata: None,
}
}
#[inline]
pub fn with_metadata(self, metadata: BTreeMap<String, String>) -> Self {
Self {
name: self.name,
data_type: self.data_type,
nullable: self.nullable,
dict_id: self.dict_id,
dict_is_ordered: self.dict_is_ordered,
metadata: Some(metadata),
}
}
#[inline]
pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
self.metadata = None;
if let Some(v) = metadata {
if !v.is_empty() {
self.metadata = Some(v);
}
}
}
#[inline]
pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
&self.metadata
}
#[inline]
pub const fn name(&self) -> &String {
&self.name
}
#[inline]
pub const fn data_type(&self) -> &DataType {
&self.data_type
}
#[inline]
pub const fn is_nullable(&self) -> bool {
self.nullable
}
#[inline]
pub const fn dict_id(&self) -> Option<i64> {
match self.data_type {
DataType::Dictionary(_, _) => Some(self.dict_id),
_ => None,
}
}
#[inline]
pub const fn dict_is_ordered(&self) -> Option<bool> {
match self.data_type {
DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
_ => None,
}
}
pub fn try_merge(&mut self, from: &Field) -> Result<()> {
match (self.metadata(), from.metadata()) {
(Some(self_metadata), Some(from_metadata)) => {
let mut merged = self_metadata.clone();
for (key, from_value) in from_metadata {
if let Some(self_value) = self_metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::InvalidArgumentError(format!(
"Fail to merge field due to conflicting metadata data value for key {}", key),
));
}
} else {
merged.insert(key.clone(), from_value.clone());
}
}
self.set_metadata(Some(merged));
}
(None, Some(from_metadata)) => {
self.set_metadata(Some(from_metadata.clone()));
}
_ => {}
}
if from.dict_id != self.dict_id {
return Err(ArrowError::InvalidArgumentError(
"Fail to merge schema Field due to conflicting dict_id".to_string(),
));
}
if from.dict_is_ordered != self.dict_is_ordered {
return Err(ArrowError::InvalidArgumentError(
"Fail to merge schema Field due to conflicting dict_is_ordered".to_string(),
));
}
match &mut self.data_type {
DataType::Struct(nested_fields) => match &from.data_type {
DataType::Struct(from_nested_fields) => {
for from_field in from_nested_fields {
let mut is_new_field = true;
for self_field in nested_fields.iter_mut() {
if self_field.name != from_field.name {
continue;
}
is_new_field = false;
self_field.try_merge(from_field)?;
}
if is_new_field {
nested_fields.push(from_field.clone());
}
}
}
_ => {
return Err(ArrowError::InvalidArgumentError(
"Fail to merge schema Field due to conflicting datatype".to_string(),
));
}
},
DataType::Union(nested_fields, _, _) => match &from.data_type {
DataType::Union(from_nested_fields, _, _) => {
for from_field in from_nested_fields {
let mut is_new_field = true;
for self_field in nested_fields.iter_mut() {
if from_field == self_field {
is_new_field = false;
break;
}
}
if is_new_field {
nested_fields.push(from_field.clone());
}
}
}
_ => {
return Err(ArrowError::InvalidArgumentError(
"Fail to merge schema Field due to conflicting datatype".to_string(),
));
}
},
DataType::Null
| DataType::Boolean
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float16
| DataType::Float32
| DataType::Float64
| DataType::Timestamp(_, _)
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::Duration(_)
| DataType::Binary
| DataType::LargeBinary
| DataType::Interval(_)
| DataType::LargeList(_)
| DataType::List(_)
| DataType::Dictionary(_, _)
| DataType::FixedSizeList(_, _)
| DataType::FixedSizeBinary(_)
| DataType::Utf8
| DataType::LargeUtf8
| DataType::Extension(_, _, _)
| DataType::Map(_, _)
| DataType::Decimal(_, _) => {
if self.data_type != from.data_type {
return Err(ArrowError::InvalidArgumentError(
"Fail to merge schema Field due to conflicting datatype".to_string(),
));
}
}
}
if from.nullable {
self.nullable = from.nullable;
}
Ok(())
}
}
impl std::fmt::Display for Field {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}