Skip to content

Commit

Permalink
re_format: barebone support for custom formatting (rerun-io#1776)
Browse files Browse the repository at this point in the history
* implement barebone support for custom formatting and apply to Tuid

* unwrap

* <ERR> rather than []

* use re_tuid
  • Loading branch information
teh-cmc authored Apr 6, 2023
1 parent d6cce1c commit 3be747c
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 31 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion crates/re_arrow_store/src/arrow_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ impl ArrayExt for dyn Array {
///
/// Nested types are expanded and cleaned recursively
fn clean_for_polars(&self) -> Box<dyn Array> {
match self.data_type() {
let datatype = self.data_type();
let datatype = if let DataType::Extension(_, inner, _) = datatype {
(**inner).clone()
} else {
datatype.clone()
};

match &datatype {
DataType::List(field) => {
// Recursively clean the contents
let typed_arr = self.as_any().downcast_ref::<ListArray<i32>>().unwrap();
Expand Down
4 changes: 3 additions & 1 deletion crates/re_format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ version.workspace = true
[package.metadata.docs.rs]
all-features = true


[dependencies]
arrow2.workspace = true
arrow2_convert.workspace = true
comfy-table.workspace = true
parking_lot.workspace = true
re_tuid.workspace = true
93 changes: 85 additions & 8 deletions crates/re_format/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,91 @@
use std::fmt::Formatter;

use arrow2::{
array::{get_display, Array},
array::{get_display, Array, ListArray, StructArray},
datatypes::{DataType, IntervalUnit, TimeUnit},
};
use arrow2_convert::deserialize::TryIntoCollection;
use comfy_table::{presets, Cell, Table};

use re_tuid::Tuid;

// ---

// TODO(#1775): Registering custom formatters should be done from other crates:
// A) Because `re_format` cannot depend on other crates (cyclic deps)
// B) Because how to deserialize and inspect some type is a private implementation detail of that
// type, re_format shouldn't know how to deserialize a TUID...

type CustomFormatter<'a, F> = Box<dyn Fn(&mut F, usize) -> std::fmt::Result + 'a>;

pub fn get_custom_display<'a, F: std::fmt::Write + 'a>(
_column_name: &'a str,
array: &'a dyn Array,
null: &'static str,
) -> CustomFormatter<'a, F> {
// NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're
// interested in the type of the array that's underneath.
let datatype = (|| match array.data_type().to_logical_type() {
DataType::List(_) => array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.iter()
.next()?
.map(|array| array.data_type().clone()),
_ => Some(array.data_type().clone()),
})();

if let Some(DataType::Extension(name, _, _)) = datatype {
match name.as_str() {
// TODO(#1775): This should be registered dynamically.
// NOTE: Can't call `Tuid::name()`, `Component` lives in `re_log_types`.
"rerun.tuid" => Box::new(|w, index| {
if let Some(tuid) = parse_tuid(array, index) {
w.write_fmt(format_args!("{tuid}"))
} else {
w.write_str("<ERR>")
}
}),
_ => get_display(array, null),
}
} else {
get_display(array, null)
}
}

// TODO(#1775): This should be defined and registered by the `re_tuid` crate.
fn parse_tuid(array: &dyn Array, index: usize) -> Option<Tuid> {
let (array, index) = match array.data_type().to_logical_type() {
// Legacy MsgId lists: just grab the first value, they're all identical
DataType::List(_) => (
array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.value(index),
0,
),
// New control columns: it's not a list to begin with!
_ => (array.to_boxed(), index),
};
let array = array.as_any().downcast_ref::<StructArray>()?;

let tuids: Vec<Tuid> = TryIntoCollection::try_into_collection(array.to_boxed()).ok()?;
tuids.get(index).copied()
}

// ---

//TODO(john) move this and the Display impl upstream into arrow2
#[repr(transparent)]
pub struct DisplayTimeUnit(TimeUnit);

impl std::fmt::Display for DisplayTimeUnit {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let s = match self.0 {
arrow2::datatypes::TimeUnit::Second => "s",
arrow2::datatypes::TimeUnit::Millisecond => "ms",
arrow2::datatypes::TimeUnit::Microsecond => "us",
arrow2::datatypes::TimeUnit::Nanosecond => "ns",
TimeUnit::Second => "s",
TimeUnit::Millisecond => "ms",
TimeUnit::Microsecond => "us",
TimeUnit::Nanosecond => "ns",
};
f.write_str(s)
}
Expand Down Expand Up @@ -133,24 +202,32 @@ where
let mut table = Table::new();
table.load_preset(presets::UTF8_FULL);

let names = names
.into_iter()
.map(|name| name.as_ref().to_owned())
.collect::<Vec<_>>();
let arrays = columns.into_iter().collect::<Vec<_>>();

let (displayers, lengths): (Vec<_>, Vec<_>) = arrays
.iter()
.map(|array| (get_display(array.as_ref(), "-"), array.as_ref().len()))
.zip(names.iter())
.map(|(array, name)| {
let formatter = get_custom_display(name, array.as_ref(), "-");
(formatter, array.as_ref().len())
})
.unzip();

if displayers.is_empty() {
return table;
}

let header = names
.into_iter()
.iter()
.zip(arrays.iter().map(|array| array.as_ref().data_type()))
.map(|(name, data_type)| {
Cell::new(format!(
"{}\n---\n{}",
name.as_ref(),
name,
DisplayDataType(data_type.clone())
))
});
Expand Down
4 changes: 2 additions & 2 deletions crates/re_log_types/src/component_types/msg_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ use crate::{Component, ComponentName};
/// # use arrow2::datatypes::{DataType, Field};
/// assert_eq!(
/// MsgId::data_type(),
/// DataType::Struct(vec![
/// DataType::Extension("rerun.tuid".into(), Box::new(DataType::Struct(vec![
/// Field::new("time_ns", DataType::UInt64, false),
/// Field::new("inc", DataType::UInt64, false),
/// ])
/// ])), None),
/// );
/// ```
#[derive(
Expand Down
49 changes: 34 additions & 15 deletions crates/re_log_types/src/data_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,15 @@ impl std::ops::IndexMut<usize> for DataCellColumn {
///
/// The table above translates to the following, where each column is contiguous in memory:
/// ```text
/// ┌──────────────────────────────────────────────────────────┬────────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐
/// │ rerun.row_id ┆ rerun.timepoint ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │
/// ╞══════════════════════════════════════════════════════════╪════════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 1}] ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 2}] ┆ b ┆ 0 ┆ - ┆ - ┆ [] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 2}, {clock, 1, 1}] ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │
/// └──────────────────────────────────────────────────────────┴────────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘
/// ┌───────────────────────────────────────────────────────────────────────────┬───────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐
/// │ frame_nr ┆ log_time ┆ rerun.row_id ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │
/// ╞═══════════════════════════════════════════════════════════════════════════╪═══════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡
/// │ 1 ┆ 2023-04-05 09:36:47.188796402 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ 1 ┆ 2023-04-05 09:36:47.188852222 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ b ┆ 0 ┆ - ┆ - ┆ [] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ 2 ┆ 2023-04-05 09:36:47.188855872 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │
/// └───────────────────────────────────────────────────────────────────────────┴───────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘
/// ```
///
/// ## Example
Expand Down Expand Up @@ -533,8 +533,6 @@ impl DataTable {
[(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into(),
);

// TODO(cmc): why do we have to do this manually on the way out, but it's done
// automatically on our behalf on the way in...?
if let DataType::Extension(name, _, _) = data.data_type() {
field
.metadata
Expand Down Expand Up @@ -627,15 +625,20 @@ impl DataTable {
.map(|cell| cell.as_arrow_ref())
.collect_vec();

let ext_name = cell_refs.first().and_then(|cell| match cell.data_type() {
DataType::Extension(name, _, _) => Some(name),
_ => None,
});

// NOTE: Avoid paying for the cost of the concatenation machinery if there's a single
// row in the column.
let data = if cell_refs.len() == 1 {
data_to_lists(column, cell_refs[0].to_boxed())
data_to_lists(column, cell_refs[0].to_boxed(), ext_name.cloned())
} else {
// NOTE: This is a column of cells, it shouldn't ever fail to concatenate since
// they share the same underlying type.
let data = arrow2::compute::concatenate::concatenate(cell_refs.as_slice())?;
data_to_lists(column, data)
data_to_lists(column, data, ext_name.cloned())
};

let field = Field::new(name, data.data_type().clone(), false)
Expand All @@ -648,10 +651,26 @@ impl DataTable {
///
/// * Before: `[C, C, C, C, C, C, C, ...]`
/// * After: `ListArray[ [[C, C], [C, C, C], None, [C], [C], ...] ]`
fn data_to_lists(column: &[Option<DataCell>], data: Box<dyn Array>) -> Box<dyn Array> {
fn data_to_lists(
column: &[Option<DataCell>],
data: Box<dyn Array>,
ext_name: Option<String>,
) -> Box<dyn Array> {
let datatype = data.data_type().clone();

let datatype = ListArray::<i32>::default_datatype(datatype);
let field = {
let mut field = Field::new("item", datatype, true);

if let Some(name) = ext_name {
field
.metadata
.extend([("ARROW:extension:name".to_owned(), name)]);
}

field
};

let datatype = DataType::List(Box::new(field));
let offsets = Offsets::try_from_lengths(column.iter().map(|cell| {
cell.as_ref()
.map_or(0, |cell| cell.num_instances() as usize)
Expand Down
28 changes: 24 additions & 4 deletions crates/re_tuid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
#![doc = document_features::document_features!()]
//!
use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};
use arrow2::datatypes::DataType;
use arrow2_convert::{ArrowDeserialize, ArrowSerialize};

#[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowField, ArrowSerialize, ArrowDeserialize,
)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowSerialize, ArrowDeserialize)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Tuid {
/// Approximate nanoseconds since epoch.
Expand All @@ -21,6 +20,27 @@ pub struct Tuid {
inc: u64,
}

arrow2_convert::arrow_enable_vec_for_type!(Tuid);

// TODO(#1774): shouldn't have to write this manually
impl arrow2_convert::field::ArrowField for Tuid {
type Type = Self;

fn data_type() -> arrow2::datatypes::DataType {
let datatype = arrow2::datatypes::DataType::Struct(<[_]>::into_vec(Box::new([
<u64 as arrow2_convert::field::ArrowField>::field("time_ns"),
<u64 as arrow2_convert::field::ArrowField>::field("inc"),
])));
DataType::Extension("rerun.tuid".into(), Box::new(datatype), None)
}
}

impl std::fmt::Display for Tuid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:032X}", self.as_u128())
}
}

impl std::fmt::Debug for Tuid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:032X}", self.as_u128())
Expand Down

0 comments on commit 3be747c

Please sign in to comment.