use ahash::HashSetExt;
use nohash_hasher::IntSet;
use smallvec::SmallVec;
use re_types_core::{AsComponents, ComponentName, SizeBytes};
use crate::{DataCell, DataCellError, DataTable, EntityPath, NumInstances, TableId, TimePoint};
#[derive(thiserror::Error, Debug)]
pub enum DataReadError {
#[error(
"Each cell must contain either 0, 1 or `num_instances` instances, \
but cell '{component}' in '{entity_path}' holds {num_instances} instances \
(expected {expected_num_instances})"
)]
WrongNumberOfInstances {
entity_path: EntityPath,
component: ComponentName,
expected_num_instances: u32,
num_instances: u32,
},
#[error(
"Same component type present multiple times within a single row: \
'{component}' in '{entity_path}'"
)]
DupedComponent {
entity_path: EntityPath,
component: ComponentName,
},
}
pub type DataReadResult<T> = ::std::result::Result<T, DataReadError>;
#[derive(thiserror::Error, Debug)]
pub enum DataRowError {
#[error(transparent)]
DataRead(#[from] DataReadError),
#[error("Error with one or more the underlying data cells: {0}")]
DataCell(#[from] DataCellError),
#[error("Could not serialize/deserialize data to/from Arrow: {0}")]
Arrow(#[from] arrow2::error::Error),
#[error("Infallible")]
Unreachable(#[from] std::convert::Infallible),
}
pub type DataRowResult<T> = ::std::result::Result<T, DataRowError>;
pub type DataCellVec = SmallVec<[DataCell; 4]>;
#[derive(Debug, Clone, PartialEq)]
pub struct DataCellRow(pub DataCellVec);
impl std::ops::Deref for DataCellRow {
type Target = [DataCell];
#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::ops::DerefMut for DataCellRow {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl std::ops::Index<usize> for DataCellRow {
type Output = DataCell;
#[inline]
fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}
impl std::ops::IndexMut<usize> for DataCellRow {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}
impl SizeBytes for DataCellRow {
#[inline]
fn heap_size_bytes(&self) -> u64 {
self.0.heap_size_bytes()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct RowId(pub(crate) re_tuid::Tuid);
impl std::fmt::Display for RowId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl RowId {
pub const ZERO: Self = Self(re_tuid::Tuid::ZERO);
#[inline]
pub fn random() -> Self {
Self(re_tuid::Tuid::random())
}
#[inline]
pub fn next(&self) -> Self {
Self(self.0.next())
}
}
impl SizeBytes for RowId {
#[inline]
fn heap_size_bytes(&self) -> u64 {
0
}
}
impl std::ops::Deref for RowId {
type Target = re_tuid::Tuid;
#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::ops::DerefMut for RowId {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
re_tuid::delegate_arrow_tuid!(RowId as "rerun.controls.RowId");
#[derive(Debug, Clone)]
pub struct DataRow {
pub row_id: RowId,
pub timepoint: TimePoint,
pub entity_path: EntityPath,
pub num_instances: NumInstances,
pub cells: DataCellRow,
}
impl DataRow {
pub fn from_archetype(
row_id: RowId,
timepoint: TimePoint,
entity_path: EntityPath,
as_components: &dyn AsComponents,
) -> anyhow::Result<Self> {
re_tracing::profile_function!();
let batches = as_components.as_component_batches();
Self::from_component_batches(
row_id,
timepoint,
entity_path,
batches.iter().map(|batch| batch.as_ref()),
)
}
pub fn from_component_batches<'a>(
row_id: RowId,
timepoint: TimePoint,
entity_path: EntityPath,
comp_batches: impl IntoIterator<Item = &'a dyn re_types_core::ComponentBatch>,
) -> anyhow::Result<Self> {
re_tracing::profile_function!();
let data_cells = comp_batches
.into_iter()
.map(|batch| DataCell::from_component_batch(batch))
.collect::<Result<Vec<DataCell>, _>>()?;
let num_instances = data_cells.iter().map(|cell| cell.num_instances()).max();
let num_instances = num_instances.unwrap_or(0);
let mut row =
DataRow::from_cells(row_id, timepoint, entity_path, num_instances, data_cells)?;
row.compute_all_size_bytes();
Ok(row)
}
pub fn from_cells(
row_id: RowId,
timepoint: impl Into<TimePoint>,
entity_path: impl Into<EntityPath>,
num_instances: u32,
cells: impl IntoIterator<Item = DataCell>,
) -> DataReadResult<Self> {
let cells = DataCellRow(cells.into_iter().collect());
let entity_path = entity_path.into();
let timepoint = timepoint.into();
let mut components = IntSet::with_capacity(cells.len());
for cell in &*cells {
let component = cell.component_name();
if !components.insert(component) {
return Err(DataReadError::DupedComponent {
entity_path,
component,
});
}
match cell.num_instances() {
0 | 1 => {}
n if n == num_instances => {}
n => {
return Err(DataReadError::WrongNumberOfInstances {
entity_path,
component,
expected_num_instances: num_instances,
num_instances: n,
})
}
}
}
Ok(Self {
row_id,
entity_path,
timepoint,
num_instances: num_instances.into(),
cells,
})
}
#[inline]
pub fn into_table(self) -> DataTable {
DataTable::from_rows(TableId::random(), [self])
}
}
impl SizeBytes for DataRow {
fn heap_size_bytes(&self) -> u64 {
let Self {
row_id,
timepoint,
entity_path,
num_instances,
cells,
} = self;
row_id.heap_size_bytes()
+ timepoint.heap_size_bytes()
+ entity_path.heap_size_bytes()
+ num_instances.heap_size_bytes()
+ cells.heap_size_bytes()
}
}
impl DataRow {
#[inline]
pub fn row_id(&self) -> RowId {
self.row_id
}
#[inline]
pub fn timepoint(&self) -> &TimePoint {
&self.timepoint
}
#[inline]
pub fn entity_path(&self) -> &EntityPath {
&self.entity_path
}
#[inline]
pub fn num_cells(&self) -> usize {
self.cells.len()
}
#[inline]
pub fn component_names(&self) -> impl ExactSizeIterator<Item = ComponentName> + '_ {
self.cells.iter().map(|cell| cell.component_name())
}
#[inline]
pub fn num_instances(&self) -> NumInstances {
self.num_instances
}
#[inline]
pub fn cells(&self) -> &DataCellRow {
&self.cells
}
#[inline]
pub fn into_cells(self) -> DataCellRow {
self.cells
}
#[inline]
pub fn find_cell(&self, component: &ComponentName) -> Option<usize> {
self.cells
.iter()
.map(|cell| cell.component_name())
.position(|name| name == *component)
}
#[inline]
pub fn compute_all_size_bytes(&mut self) {
for cell in &mut self.cells.0 {
cell.compute_size_bytes();
}
}
}
impl DataRow {
pub fn from_cells1_sized<C0>(
row_id: RowId,
entity_path: impl Into<EntityPath>,
timepoint: impl Into<TimePoint>,
num_instances: u32,
into_cells: C0,
) -> DataReadResult<DataRow>
where
C0: Into<DataCell>,
{
let mut this = Self::from_cells(
row_id,
timepoint.into(),
entity_path.into(),
num_instances,
[into_cells.into()],
)?;
this.compute_all_size_bytes();
Ok(this)
}
pub fn from_cells1<C0>(
row_id: RowId,
entity_path: impl Into<EntityPath>,
timepoint: impl Into<TimePoint>,
num_instances: u32,
into_cells: C0,
) -> DataRowResult<DataRow>
where
C0: TryInto<DataCell>,
DataRowError: From<<C0 as TryInto<DataCell>>::Error>,
{
Ok(Self::from_cells(
row_id,
timepoint.into(),
entity_path.into(),
num_instances,
[into_cells.try_into()?],
)?)
}
pub fn from_cells2_sized<C0, C1>(
row_id: RowId,
entity_path: impl Into<EntityPath>,
timepoint: impl Into<TimePoint>,
num_instances: u32,
into_cells: (C0, C1),
) -> DataRowResult<DataRow>
where
C0: Into<DataCell>,
C1: Into<DataCell>,
{
let mut this = Self::from_cells(
row_id,
timepoint.into(),
entity_path.into(),
num_instances,
[
into_cells.0.into(), into_cells.1.into(), ],
)?;
this.compute_all_size_bytes();
Ok(this)
}
pub fn from_cells2<C0, C1>(
row_id: RowId,
entity_path: impl Into<EntityPath>,
timepoint: impl Into<TimePoint>,
num_instances: u32,
into_cells: (C0, C1),
) -> DataRowResult<DataRow>
where
C0: TryInto<DataCell>,
C1: TryInto<DataCell>,
DataRowError: From<<C0 as TryInto<DataCell>>::Error>,
DataRowError: From<<C1 as TryInto<DataCell>>::Error>,
{
Ok(Self::from_cells(
row_id,
timepoint.into(),
entity_path.into(),
num_instances,
[
into_cells.0.try_into()?, into_cells.1.try_into()?, ],
)?)
}
pub fn from_cells3<C0, C1, C2>(
row_id: RowId,
entity_path: impl Into<EntityPath>,
timepoint: impl Into<TimePoint>,
num_instances: u32,
into_cells: (C0, C1, C2),
) -> DataRowResult<DataRow>
where
C0: TryInto<DataCell>,
C1: TryInto<DataCell>,
C2: TryInto<DataCell>,
DataRowError: From<<C0 as TryInto<DataCell>>::Error>,
DataRowError: From<<C1 as TryInto<DataCell>>::Error>,
DataRowError: From<<C2 as TryInto<DataCell>>::Error>,
{
Ok(Self::from_cells(
row_id,
timepoint.into(),
entity_path.into(),
num_instances,
[
into_cells.0.try_into()?, into_cells.1.try_into()?, into_cells.2.try_into()?, ],
)?)
}
}
impl std::fmt::Display for DataRow {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Row #{} @ '{}'", self.row_id, self.entity_path)?;
for (timeline, time) in &self.timepoint {
writeln!(
f,
"- {}: {}",
timeline.name(),
timeline.typ().format_utc(*time)
)?;
}
re_format::arrow::format_table(
self.cells.iter().map(|cell| cell.to_arrow_monolist()),
self.cells.iter().map(|cell| cell.component_name()),
)
.fmt(f)
}
}