Dataframe v2: support for using_index_values (rerun-io#7616)

Implements support for `using_index_values`. I've changed the definition to a `BTreeSet` instead of a `Vec`, so that this is consistent with the rest of the API: i.e. the results are sorted, and unique. Otherwise things get real ugly, real fast.
TeoNikolov · Oct 8, 2024 · 49395aa · 49395aa
1 parent ec30916
commit 49395aa
Show file tree

Hide file tree

Showing 4 changed files with 237 additions and 86 deletions.
diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs
@@ -733,7 +733,7 @@ pub struct QueryExpression2 {
     /// Only rows where at least 1 of the view-contents contains non-null data within that range will be kept in
     /// the final dataset.
     ///
-    /// This is ignored if [`QueryExpression2::sampled_index_values`] is set.
+    /// This is ignored if [`QueryExpression2::using_index_values`] is set.
     ///
     /// Example: `ResolvedTimeRange(10, 20)`.
     pub filtered_index_range: Option<IndexRange>,
@@ -743,27 +743,25 @@ pub struct QueryExpression2 {
     /// Only rows where at least 1 column contains non-null data at these specific values will be kept
     /// in the final dataset.
     ///
-    /// This is ignored if [`QueryExpression2::sampled_index_values`] is set.
+    /// This is ignored if [`QueryExpression2::using_index_values`] is set.
     ///
     /// Example: `[TimeInt(12), TimeInt(14)]`.
     pub filtered_index_values: Option<BTreeSet<IndexValue>>,
 
-    /// TODO(cmc): NOT IMPLEMENTED.
-    ///
     /// The specific index values used to sample _rows_ from the view contents.
     ///
     /// The final dataset will contain one row per sampled index value, regardless of whether data
     /// existed for that index value in the view contents.
+    /// The semantics of the query are consistent with all other settings: the results will be
+    /// sorted on the `filtered_index`, and only contain unique index values.
     ///
     /// The order of the samples will be respected in the final result.
     ///
-    /// If [`QueryExpression2::sampled_index_values`] is set, it overrides both [`QueryExpression2::filtered_index_range`]
+    /// If [`QueryExpression2::using_index_values`] is set, it overrides both [`QueryExpression2::filtered_index_range`]
     /// and [`QueryExpression2::filtered_index_values`].
     ///
     /// Example: `[TimeInt(12), TimeInt(14)]`.
-    //
-    // TODO(jleibs): We need an alternative name for sampled.
-    pub sampled_index_values: Option<Vec<IndexValue>>,
+    pub using_index_values: Option<BTreeSet<IndexValue>>,
 
     /// The component column used to filter out _rows_ from the view contents.
     ///
@@ -774,8 +772,6 @@ pub struct QueryExpression2 {
     // TODO(cmc): multi-pov support
     pub filtered_point_of_view: Option<ComponentColumnSelector>,
 
-    /// TODO(cmc): NOT IMPLEMENTED.
-    ///
     /// Specifies how null values should be filled in the returned dataframe.
     ///
     /// Defaults to [`SparseFillStrategy::None`].
@@ -803,7 +799,7 @@ impl QueryExpression2 {
             filtered_index: index,
             filtered_index_range: None,
             filtered_index_values: None,
-            sampled_index_values: None,
+            using_index_values: None,
             filtered_point_of_view: None,
             sparse_fill_strategy: SparseFillStrategy::None,
             selection: None,