Skip to content

Commit

Permalink
fix!: correctly handle nulls in btree and bitmap indices (#3211)
Browse files Browse the repository at this point in the history
There were a few issues with our null handling in scalar indices.

First, it appears I assumed earlier that `X < NULL` and `X > NULL` would
always be false. However, in `arrow-rs` the ordering considers `NULL` to
be "the smallest value" and so `X < NULL` always evaluated to true. This
required some changes to the logic in the btree and bitmap indices.

Second, the btree index was still using the v1 file format because it
relied on the page size to keep track of the index's batch size. I've
instead made the batch size a configurable property (configurable in
code, not configurable by users) and made it so that btree can use the
v2 file format.

Finally, related to the above, I changed it so we now write v2 files for
all scalar indices, even if the dataset is a v1 dataset. I think that's
a reasonable decision at this point.

The logic to fallback and read the old v1 files was already in place (I
believe @BubbleCal added it back when working on inverted index) but I
added a migration test just to be sure we weren't breaking our btree /
bitmap support.

Users with existing bitmap indices will get the new correct behavior
without any changes.
Users with existing btree indices will get some of the new correct
behavior but will need to retrain their indices to get all of the
correct behavior.

BREAKING CHANGE: Bitmap and btree indices will no longer be readable by
older versions of Lance. This is not a "backwards compatibility change"
(no APIs or code will stop working) but rather a "forwards compatibility
change" (you need to be careful in a multi-verison deployment or if you
roll back)
  • Loading branch information
westonpace authored Dec 6, 2024
1 parent f21397d commit 1e349cd
Show file tree
Hide file tree
Showing 28 changed files with 291 additions and 189 deletions.
32 changes: 16 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 17 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ exclude = ["python"]
resolver = "2"

[workspace.package]
version = "0.20.1"
version = "0.21.0"
edition = "2021"
authors = ["Lance Devs <[email protected]>"]
license = "Apache-2.0"
Expand All @@ -44,21 +44,21 @@ categories = [
rust-version = "1.78"

[workspace.dependencies]
lance = { version = "=0.20.1", path = "./rust/lance" }
lance-arrow = { version = "=0.20.1", path = "./rust/lance-arrow" }
lance-core = { version = "=0.20.1", path = "./rust/lance-core" }
lance-datafusion = { version = "=0.20.1", path = "./rust/lance-datafusion" }
lance-datagen = { version = "=0.20.1", path = "./rust/lance-datagen" }
lance-encoding = { version = "=0.20.1", path = "./rust/lance-encoding" }
lance-encoding-datafusion = { version = "=0.20.1", path = "./rust/lance-encoding-datafusion" }
lance-file = { version = "=0.20.1", path = "./rust/lance-file" }
lance-index = { version = "=0.20.1", path = "./rust/lance-index" }
lance-io = { version = "=0.20.1", path = "./rust/lance-io" }
lance-jni = { version = "=0.20.1", path = "./java/core/lance-jni" }
lance-linalg = { version = "=0.20.1", path = "./rust/lance-linalg" }
lance-table = { version = "=0.20.1", path = "./rust/lance-table" }
lance-test-macros = { version = "=0.20.1", path = "./rust/lance-test-macros" }
lance-testing = { version = "=0.20.1", path = "./rust/lance-testing" }
lance = { version = "=0.21.0", path = "./rust/lance" }
lance-arrow = { version = "=0.21.0", path = "./rust/lance-arrow" }
lance-core = { version = "=0.21.0", path = "./rust/lance-core" }
lance-datafusion = { version = "=0.21.0", path = "./rust/lance-datafusion" }
lance-datagen = { version = "=0.21.0", path = "./rust/lance-datagen" }
lance-encoding = { version = "=0.21.0", path = "./rust/lance-encoding" }
lance-encoding-datafusion = { version = "=0.21.0", path = "./rust/lance-encoding-datafusion" }
lance-file = { version = "=0.21.0", path = "./rust/lance-file" }
lance-index = { version = "=0.21.0", path = "./rust/lance-index" }
lance-io = { version = "=0.21.0", path = "./rust/lance-io" }
lance-jni = { version = "=0.21.0", path = "./java/core/lance-jni" }
lance-linalg = { version = "=0.21.0", path = "./rust/lance-linalg" }
lance-table = { version = "=0.21.0", path = "./rust/lance-table" }
lance-test-macros = { version = "=0.21.0", path = "./rust/lance-test-macros" }
lance-testing = { version = "=0.21.0", path = "./rust/lance-testing" }
approx = "0.5.1"
# Note that this one does not include pyarrow
arrow = { version = "53.2", optional = false, features = ["prettyprint"] }
Expand Down Expand Up @@ -111,7 +111,7 @@ datafusion-physical-expr = { version = "42.0", features = [
] }
deepsize = "0.2.0"
either = "1.0"
fsst = { version = "=0.20.1", path = "./rust/lance-encoding/src/compression_algo/fsst" }
fsst = { version = "=0.21.0", path = "./rust/lance-encoding/src/compression_algo/fsst" }
futures = "0.3"
http = "1.1.0"
hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
Expand Down
2 changes: 1 addition & 1 deletion java/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lance-parent</artifactId>
<version>0.20.1</version>
<version>0.21.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.lancedb</groupId>
<artifactId>lance-parent</artifactId>
<version>0.20.1</version>
<version>0.21.0</version>
<packaging>pom</packaging>

<name>Lance Parent</name>
Expand Down
4 changes: 2 additions & 2 deletions java/spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lance-parent</artifactId>
<version>0.20.1</version>
<version>0.21.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down Expand Up @@ -82,7 +82,7 @@
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-core</artifactId>
<version>0.20.1</version>
<version>0.21.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
Expand Down
34 changes: 17 additions & 17 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pylance"
version = "0.20.1"
version = "0.21.0"
edition = "2021"
authors = ["Lance Devs <[email protected]>"]
rust-version = "1.65"
Expand Down
Loading

0 comments on commit 1e349cd

Please sign in to comment.