diff --git a/Cargo.lock b/Cargo.lock index 6fe5c47177ba..331040f76391 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3861,7 +3861,10 @@ name = "re_format" version = "0.4.0" dependencies = [ "arrow2", + "arrow2_convert", "comfy-table 6.1.4", + "parking_lot 0.12.1", + "re_tuid", ] [[package]] diff --git a/crates/re_arrow_store/src/arrow_util.rs b/crates/re_arrow_store/src/arrow_util.rs index d54d08fd26b4..ef119bd51b31 100644 --- a/crates/re_arrow_store/src/arrow_util.rs +++ b/crates/re_arrow_store/src/arrow_util.rs @@ -51,7 +51,14 @@ impl ArrayExt for dyn Array { /// /// Nested types are expanded and cleaned recursively fn clean_for_polars(&self) -> Box { - match self.data_type() { + let datatype = self.data_type(); + let datatype = if let DataType::Extension(_, inner, _) = datatype { + (**inner).clone() + } else { + datatype.clone() + }; + + match &datatype { DataType::List(field) => { // Recursively clean the contents let typed_arr = self.as_any().downcast_ref::>().unwrap(); diff --git a/crates/re_format/Cargo.toml b/crates/re_format/Cargo.toml index de103832902e..9c9525b85e3c 100644 --- a/crates/re_format/Cargo.toml +++ b/crates/re_format/Cargo.toml @@ -15,7 +15,9 @@ version.workspace = true [package.metadata.docs.rs] all-features = true - [dependencies] arrow2.workspace = true +arrow2_convert.workspace = true comfy-table.workspace = true +parking_lot.workspace = true +re_tuid.workspace = true diff --git a/crates/re_format/src/arrow.rs b/crates/re_format/src/arrow.rs index 5ac404970522..fcc8a4133cee 100644 --- a/crates/re_format/src/arrow.rs +++ b/crates/re_format/src/arrow.rs @@ -3,11 +3,80 @@ use std::fmt::Formatter; use arrow2::{ - array::{get_display, Array}, + array::{get_display, Array, ListArray, StructArray}, datatypes::{DataType, IntervalUnit, TimeUnit}, }; +use arrow2_convert::deserialize::TryIntoCollection; use comfy_table::{presets, Cell, Table}; +use re_tuid::Tuid; + +// --- + +// TODO(#1775): Registering custom formatters should be done from other crates: +// A) Because `re_format` cannot depend on other crates (cyclic deps) +// B) Because how to deserialize and inspect some type is a private implementation detail of that +// type, re_format shouldn't know how to deserialize a TUID... + +type CustomFormatter<'a, F> = Box std::fmt::Result + 'a>; + +pub fn get_custom_display<'a, F: std::fmt::Write + 'a>( + _column_name: &'a str, + array: &'a dyn Array, + null: &'static str, +) -> CustomFormatter<'a, F> { + // NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're + // interested in the type of the array that's underneath. + let datatype = (|| match array.data_type().to_logical_type() { + DataType::List(_) => array + .as_any() + .downcast_ref::>()? + .iter() + .next()? + .map(|array| array.data_type().clone()), + _ => Some(array.data_type().clone()), + })(); + + if let Some(DataType::Extension(name, _, _)) = datatype { + match name.as_str() { + // TODO(#1775): This should be registered dynamically. + // NOTE: Can't call `Tuid::name()`, `Component` lives in `re_log_types`. + "rerun.tuid" => Box::new(|w, index| { + if let Some(tuid) = parse_tuid(array, index) { + w.write_fmt(format_args!("{tuid}")) + } else { + w.write_str("") + } + }), + _ => get_display(array, null), + } + } else { + get_display(array, null) + } +} + +// TODO(#1775): This should be defined and registered by the `re_tuid` crate. +fn parse_tuid(array: &dyn Array, index: usize) -> Option { + let (array, index) = match array.data_type().to_logical_type() { + // Legacy MsgId lists: just grab the first value, they're all identical + DataType::List(_) => ( + array + .as_any() + .downcast_ref::>()? + .value(index), + 0, + ), + // New control columns: it's not a list to begin with! + _ => (array.to_boxed(), index), + }; + let array = array.as_any().downcast_ref::()?; + + let tuids: Vec = TryIntoCollection::try_into_collection(array.to_boxed()).ok()?; + tuids.get(index).copied() +} + +// --- + //TODO(john) move this and the Display impl upstream into arrow2 #[repr(transparent)] pub struct DisplayTimeUnit(TimeUnit); @@ -15,10 +84,10 @@ pub struct DisplayTimeUnit(TimeUnit); impl std::fmt::Display for DisplayTimeUnit { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let s = match self.0 { - arrow2::datatypes::TimeUnit::Second => "s", - arrow2::datatypes::TimeUnit::Millisecond => "ms", - arrow2::datatypes::TimeUnit::Microsecond => "us", - arrow2::datatypes::TimeUnit::Nanosecond => "ns", + TimeUnit::Second => "s", + TimeUnit::Millisecond => "ms", + TimeUnit::Microsecond => "us", + TimeUnit::Nanosecond => "ns", }; f.write_str(s) } @@ -133,11 +202,19 @@ where let mut table = Table::new(); table.load_preset(presets::UTF8_FULL); + let names = names + .into_iter() + .map(|name| name.as_ref().to_owned()) + .collect::>(); let arrays = columns.into_iter().collect::>(); let (displayers, lengths): (Vec<_>, Vec<_>) = arrays .iter() - .map(|array| (get_display(array.as_ref(), "-"), array.as_ref().len())) + .zip(names.iter()) + .map(|(array, name)| { + let formatter = get_custom_display(name, array.as_ref(), "-"); + (formatter, array.as_ref().len()) + }) .unzip(); if displayers.is_empty() { @@ -145,12 +222,12 @@ where } let header = names - .into_iter() + .iter() .zip(arrays.iter().map(|array| array.as_ref().data_type())) .map(|(name, data_type)| { Cell::new(format!( "{}\n---\n{}", - name.as_ref(), + name, DisplayDataType(data_type.clone()) )) }); diff --git a/crates/re_log_types/src/component_types/msg_id.rs b/crates/re_log_types/src/component_types/msg_id.rs index 104f444cac09..8ab6a59da1f5 100644 --- a/crates/re_log_types/src/component_types/msg_id.rs +++ b/crates/re_log_types/src/component_types/msg_id.rs @@ -12,10 +12,10 @@ use crate::{Component, ComponentName}; /// # use arrow2::datatypes::{DataType, Field}; /// assert_eq!( /// MsgId::data_type(), -/// DataType::Struct(vec![ +/// DataType::Extension("rerun.tuid".into(), Box::new(DataType::Struct(vec![ /// Field::new("time_ns", DataType::UInt64, false), /// Field::new("inc", DataType::UInt64, false), -/// ]) +/// ])), None), /// ); /// ``` #[derive( diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 3ae3796288fb..49eccc0a43a9 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -150,15 +150,15 @@ impl std::ops::IndexMut for DataCellColumn { /// /// The table above translates to the following, where each column is contiguous in memory: /// ```text -/// ┌───────────────────────┬───────────────────────────────────┬────────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐ -/// │ rerun.row_id ┆ rerun.timepoint ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │ -/// ╞═══════════════════════╪═══════════════════════════════════╪════════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡ -/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 1}] ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │ -/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 2}] ┆ b ┆ 0 ┆ - ┆ - ┆ [] │ -/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 2}, {clock, 1, 1}] ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │ -/// └───────────────────────┴───────────────────────────────────┴────────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘ +/// ┌──────────┬───────────────────────────────┬──────────────────────────────────┬───────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐ +/// │ frame_nr ┆ log_time ┆ rerun.row_id ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │ +/// ╞══════════╪═══════════════════════════════╪══════════════════════════════════╪═══════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡ +/// │ 1 ┆ 2023-04-05 09:36:47.188796402 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │ +/// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +/// │ 1 ┆ 2023-04-05 09:36:47.188852222 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ b ┆ 0 ┆ - ┆ - ┆ [] │ +/// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +/// │ 2 ┆ 2023-04-05 09:36:47.188855872 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │ +/// └──────────┴───────────────────────────────┴──────────────────────────────────┴───────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘ /// ``` /// /// ## Example @@ -533,8 +533,6 @@ impl DataTable { [(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into(), ); - // TODO(cmc): why do we have to do this manually on the way out, but it's done - // automatically on our behalf on the way in...? if let DataType::Extension(name, _, _) = data.data_type() { field .metadata @@ -627,15 +625,20 @@ impl DataTable { .map(|cell| cell.as_arrow_ref()) .collect_vec(); + let ext_name = cell_refs.first().and_then(|cell| match cell.data_type() { + DataType::Extension(name, _, _) => Some(name), + _ => None, + }); + // NOTE: Avoid paying for the cost of the concatenation machinery if there's a single // row in the column. let data = if cell_refs.len() == 1 { - data_to_lists(column, cell_refs[0].to_boxed()) + data_to_lists(column, cell_refs[0].to_boxed(), ext_name.cloned()) } else { // NOTE: This is a column of cells, it shouldn't ever fail to concatenate since // they share the same underlying type. let data = arrow2::compute::concatenate::concatenate(cell_refs.as_slice())?; - data_to_lists(column, data) + data_to_lists(column, data, ext_name.cloned()) }; let field = Field::new(name, data.data_type().clone(), false) @@ -648,10 +651,26 @@ impl DataTable { /// /// * Before: `[C, C, C, C, C, C, C, ...]` /// * After: `ListArray[ [[C, C], [C, C, C], None, [C], [C], ...] ]` - fn data_to_lists(column: &[Option], data: Box) -> Box { + fn data_to_lists( + column: &[Option], + data: Box, + ext_name: Option, + ) -> Box { let datatype = data.data_type().clone(); - let datatype = ListArray::::default_datatype(datatype); + let field = { + let mut field = Field::new("item", datatype, true); + + if let Some(name) = ext_name { + field + .metadata + .extend([("ARROW:extension:name".to_owned(), name)]); + } + + field + }; + + let datatype = DataType::List(Box::new(field)); let offsets = Offsets::try_from_lengths(column.iter().map(|cell| { cell.as_ref() .map_or(0, |cell| cell.num_instances() as usize) diff --git a/crates/re_tuid/src/lib.rs b/crates/re_tuid/src/lib.rs index 49c0840dad3d..072b65261da5 100644 --- a/crates/re_tuid/src/lib.rs +++ b/crates/re_tuid/src/lib.rs @@ -6,11 +6,10 @@ #![doc = document_features::document_features!()] //! -use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; +use arrow2::datatypes::DataType; +use arrow2_convert::{ArrowDeserialize, ArrowSerialize}; -#[derive( - Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowField, ArrowSerialize, ArrowDeserialize, -)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowSerialize, ArrowDeserialize)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Tuid { /// Approximate nanoseconds since epoch. @@ -21,6 +20,27 @@ pub struct Tuid { inc: u64, } +arrow2_convert::arrow_enable_vec_for_type!(Tuid); + +// TODO(#1774): shouldn't have to write this manually +impl arrow2_convert::field::ArrowField for Tuid { + type Type = Self; + + fn data_type() -> arrow2::datatypes::DataType { + let datatype = arrow2::datatypes::DataType::Struct(<[_]>::into_vec(Box::new([ + ::field("time_ns"), + ::field("inc"), + ]))); + DataType::Extension("rerun.tuid".into(), Box::new(datatype), None) + } +} + +impl std::fmt::Display for Tuid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:032X}", self.as_u128()) + } +} + impl std::fmt::Debug for Tuid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:032X}", self.as_u128())