Skip to content

Commit

Permalink
Allow a Column selector to be specified directly by a string (rerun-i…
Browse files Browse the repository at this point in the history
…o#7786)

### What
- Needing to use `rr.dataframe.ColumnSelector(...)` is a pain.
- Allow the syntax: "entity/path:Component"

Based on top of: 
- rerun-io#7761
to avoid conflicts. Rebase after mering 7761

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7786?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7786?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/7786)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.

---------

Co-authored-by: Emil Ernerfeldt <[email protected]>
  • Loading branch information
jleibs and emilk authored Oct 17, 2024
1 parent fedc962 commit 8bb554b
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 35 deletions.
6 changes: 3 additions & 3 deletions docs/content/howto/dataframe-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ Rows where a specific column has null values may be filtered out using the `filt

```python
# only keep rows where a position is available for the robot
view = view.filter_is_not_null(rr.dataframe.ComponentColumnSelector("/world/robot", "Position3D"))
view = view.filter_is_not_null("/world/robot:Position3D")
```

### Specifying rows
Expand Down Expand Up @@ -155,8 +155,8 @@ record_batches = view.select()
# select only the specified columns
record_batches = view.select(
[
rr.dataframe.IndexColumnSelector("frame_nr"),
rr.dataframe.ComponentColumnSelector("/world/robot", "Position3D"),
"frame_nr",
"/world/robot:Position3D",
],
)
```
Expand Down
2 changes: 2 additions & 0 deletions rerun_py/rerun_bindings/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)

AnyColumn: TypeAlias = Union[
str,
"ComponentColumnDescriptor",
"ComponentColumnSelector",
"IndexColumnDescriptor",
Expand All @@ -27,6 +28,7 @@


AnyComponentColumn: TypeAlias = Union[
str,
"ComponentColumnDescriptor",
"ComponentColumnSelector",
]
Expand Down
78 changes: 60 additions & 18 deletions rerun_py/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
#![allow(clippy::borrow_deref_ref)] // False positive due to #[pyfunction] macro
#![allow(unsafe_op_in_unsafe_fn)] // False positive due to #[pyfunction] macro

use std::collections::{BTreeMap, BTreeSet};
use std::{
collections::{BTreeMap, BTreeSet},
str::FromStr as _,
};

use arrow::{
array::{make_array, Array, ArrayData, Int64Array, RecordBatchIterator, RecordBatchReader},
Expand Down Expand Up @@ -246,41 +249,74 @@ impl PyComponentColumnSelector {
/// A type alias for any component-column-like object.
#[derive(FromPyObject)]
enum AnyColumn {
#[pyo3(transparent, annotation = "time_descriptor")]
TimeDescriptor(PyIndexColumnDescriptor),
#[pyo3(transparent, annotation = "time_selector")]
TimeSelector(PyIndexColumnSelector),
#[pyo3(transparent, annotation = "name")]
Name(String),
#[pyo3(transparent, annotation = "index_descriptor")]
IndexDescriptor(PyIndexColumnDescriptor),
#[pyo3(transparent, annotation = "index_selector")]
IndexSelector(PyIndexColumnSelector),
#[pyo3(transparent, annotation = "component_descriptor")]
ComponentDescriptor(PyComponentColumnDescriptor),
#[pyo3(transparent, annotation = "component_selector")]
ComponentSelector(PyComponentColumnSelector),
}

impl AnyColumn {
fn into_selector(self) -> ColumnSelector {
fn into_selector(self) -> PyResult<ColumnSelector> {
match self {
Self::TimeDescriptor(desc) => ColumnDescriptor::Time(desc.0).into(),
Self::TimeSelector(selector) => selector.0.into(),
Self::ComponentDescriptor(desc) => ColumnDescriptor::Component(desc.0).into(),
Self::ComponentSelector(selector) => selector.0.into(),
Self::Name(name) => {
if !name.contains(':') && !name.contains('/') {
Ok(ColumnSelector::Time(TimeColumnSelector {
timeline: name.into(),
}))
} else {
let component_path =
re_log_types::ComponentPath::from_str(&name).map_err(|err| {
PyValueError::new_err(format!("Invalid component path {name:?}: {err}"))
})?;

Ok(ColumnSelector::Component(ComponentColumnSelector {
entity_path: component_path.entity_path,
component_name: component_path.component_name.to_string(),
}))
}
}
Self::IndexDescriptor(desc) => Ok(ColumnDescriptor::Time(desc.0).into()),
Self::IndexSelector(selector) => Ok(selector.0.into()),
Self::ComponentDescriptor(desc) => Ok(ColumnDescriptor::Component(desc.0).into()),
Self::ComponentSelector(selector) => Ok(selector.0.into()),
}
}
}

/// A type alias for any component-column-like object.
#[derive(FromPyObject)]
enum AnyComponentColumn {
#[pyo3(transparent, annotation = "name")]
Name(String),
#[pyo3(transparent, annotation = "component_descriptor")]
ComponentDescriptor(PyComponentColumnDescriptor),
#[pyo3(transparent, annotation = "component_selector")]
ComponentSelector(PyComponentColumnSelector),
}

impl AnyComponentColumn {
#[allow(dead_code)]
fn into_selector(self) -> ComponentColumnSelector {
fn into_selector(self) -> PyResult<ComponentColumnSelector> {
match self {
Self::ComponentDescriptor(desc) => desc.0.into(),
Self::ComponentSelector(selector) => selector.0,
Self::Name(name) => {
let component_path =
re_log_types::ComponentPath::from_str(&name).map_err(|err| {
PyValueError::new_err(format!("Invalid component path '{name}': {err}"))
})?;

Ok(ComponentColumnSelector {
entity_path: component_path.entity_path,
component_name: component_path.component_name.to_string(),
})
}
Self::ComponentDescriptor(desc) => Ok(desc.0.into()),
Self::ComponentSelector(selector) => Ok(selector.0),
}
}
}
Expand Down Expand Up @@ -576,7 +612,13 @@ impl PyRecordingView {

let columns = columns.or_else(|| if !args.is_empty() { Some(args) } else { None });

Ok(columns.map(|cols| cols.into_iter().map(|col| col.into_selector()).collect()))
columns
.map(|cols| {
cols.into_iter()
.map(|col| col.into_selector())
.collect::<PyResult<_>>()
})
.transpose()
}
}

Expand Down Expand Up @@ -1031,16 +1073,16 @@ impl PyRecordingView {
/// A new view containing only the data where the specified component column is not null.
///
/// The original view will not be modified.
fn filter_is_not_null(&self, column: AnyComponentColumn) -> Self {
fn filter_is_not_null(&self, column: AnyComponentColumn) -> PyResult<Self> {
let column = column.into_selector();

let mut query_expression = self.query_expression.clone();
query_expression.filtered_is_not_null = Some(column);
query_expression.filtered_is_not_null = Some(column?);

Self {
Ok(Self {
recording: self.recording.clone(),
query_expression,
}
})
}

#[allow(rustdoc::private_doc_tests)]
Expand Down
41 changes: 27 additions & 14 deletions rerun_py/tests/unit/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,36 @@ def test_content_filters(self) -> None:

def test_select_columns(self) -> None:
view = self.recording.view(index="my_index", contents="points")
index_col = rr.dataframe.IndexColumnSelector("my_index")

selectors = [rr.components.Position3D, "rerun.components.Position3D", "Position3D", "position3D"]
for selector in selectors:
pos = rr.dataframe.ComponentColumnSelector("points", selector)
index_col_selectors = [rr.dataframe.IndexColumnSelector("my_index"), "my_index"]

obj_selectors = [
rr.dataframe.ComponentColumnSelector("points", selector)
for selector in [
rr.components.Position3D,
"rerun.components.Position3D",
"Position3D",
"position3D",
]
]
str_selectors = [
"/points:rerun.components.Position3D",
"/points:Position3D",
"/points:position3d",
]

batches = view.select(index_col, pos)
for index_selector in index_col_selectors:
for col_selector in obj_selectors + str_selectors:
batches = view.select(index_selector, col_selector)

table = pa.Table.from_batches(batches, batches.schema)
# points
assert table.num_columns == 2
assert table.num_rows == 2
table = pa.Table.from_batches(batches, batches.schema)
# points
assert table.num_columns == 2
assert table.num_rows == 2

assert table.column("my_index")[0].equals(self.expected_index0[0])
assert table.column("my_index")[1].equals(self.expected_index1[0])
assert table.column("/points:Position3D")[0].values.equals(self.expected_pos0)
assert table.column("/points:Position3D")[1].values.equals(self.expected_pos1)
assert table.column("my_index")[0].equals(self.expected_index0[0])
assert table.column("my_index")[1].equals(self.expected_index1[0])
assert table.column("/points:Position3D")[0].values.equals(self.expected_pos0)
assert table.column("/points:Position3D")[1].values.equals(self.expected_pos1)

def test_index_values(self) -> None:
view = self.recording.view(index="my_index", contents="points")
Expand Down

0 comments on commit 8bb554b

Please sign in to comment.