-
Notifications
You must be signed in to change notification settings - Fork 242
/
Copy pathtable.proto
364 lines (317 loc) · 13 KB
/
table.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors
syntax = "proto3";
package lance.table;
import "google/protobuf/any.proto";
import "google/protobuf/timestamp.proto";
import "file.proto";
/*
Format:
+----------------------------------------+
| Encoded Column 0, Chunk 0 |
...
| Encoded Column M, Chunk N - 1 |
| Encoded Column M, Chunk N |
| Indices ... |
| Chunk Position (M x N x 8) |
| Manifest (Optional) |
| Metadata |
| i64: metadata position |
| MAJOR_VERSION | MINOR_VERSION | "LANC" |
+----------------------------------------+
*/
/// UUID type. encoded as 16 bytes.
message UUID {
bytes uuid = 1;
}
// Manifest is a global section shared between all the files.
message Manifest {
// All fields of the dataset, including the nested fields.
repeated lance.file.Field fields = 1;
// Fragments of the dataset.
repeated DataFragment fragments = 2;
// Snapshot version number.
uint64 version = 3;
// The file position of the version auxiliary data.
// * It is not inheritable between versions.
// * It is not loaded by default during query.
uint64 version_aux_data = 4;
// Schema metadata.
map<string, bytes> metadata = 5;
message WriterVersion {
// The name of the library that created this file.
string library = 1;
// The version of the library that created this file. Because we cannot assume
// that the library is semantically versioned, this is a string. However, if it
// is semantically versioned, it should be a valid semver string without any 'v'
// prefix. For example: `2.0.0`, `2.0.0-rc.1`.
string version = 2;
}
// The version of the writer that created this file.
//
// This information may be used to detect whether the file may have known bugs
// associated with that writer.
WriterVersion writer_version = 13;
// If presented, the file position of the index metadata.
optional uint64 index_section = 6;
// Version creation Timestamp, UTC timezone
google.protobuf.Timestamp timestamp = 7;
// Optional version tag
string tag = 8;
// Feature flags for readers.
//
// A bitmap of flags that indicate which features are required to be able to
// read the table. If a reader does not recognize a flag that is set, it
// should not attempt to read the dataset.
//
// Known flags:
// * 1: deletion files are present
// * 2: move_stable_row_ids: row IDs are tracked and stable after move operations
// (such as compaction), but not updates.
// * 4: use v2 format (deprecated)
// * 8: table config is present
uint64 reader_feature_flags = 9;
// Feature flags for writers.
//
// A bitmap of flags that indicate which features are required to be able to
// write to the dataset. if a writer does not recognize a flag that is set, it
// should not attempt to write to the dataset.
//
// The flags are the same as for reader_feature_flags, although they will not
// always apply to both.
uint64 writer_feature_flags = 10;
// The highest fragment ID that has been used so far.
//
// This ID is not guaranteed to be present in the current version, but it may
// have been used in previous versions.
//
// For a single file, will be zero.
uint32 max_fragment_id = 11;
// Path to the transaction file, relative to `{root}/_transactions`
//
// This contains a serialized Transaction message representing the transaction
// that created this version.
//
// May be empty if no transaction file was written.
//
// The path format is "{read_version}-{uuid}.txn" where {read_version} is the
// version of the table the transaction read from, and {uuid} is a
// hyphen-separated UUID.
string transaction_file = 12;
// The next unused row id. If zero, then the table does not have any rows.
//
// This is only used if the "move_stable_row_ids" feature flag is set.
uint64 next_row_id = 14;
message DataStorageFormat {
// The format of the data files (e.g. "lance")
string file_format = 1;
// The max format version of the data files.
//
// This is the maximum version of the file format that the dataset will create.
// This may be lower than the maximum version that can be written in order to allow
// older readers to read the dataset.
string version = 2;
}
// The data storage format
//
// This specifies what format is used to store the data files.
DataStorageFormat data_format = 15;
// Table config.
//
// Keys with the prefix "lance." are reserved for the Lance library. Other
// libraries may wish to similarly prefix their configuration keys
// appropriately.
map<string, string> config = 16;
// The version of the blob dataset associated with this table. Changes to
// blob fields will modify the blob dataset and update this version in the parent
// table.
//
// If this value is 0 then there are no blob fields.
uint64 blob_dataset_version = 17;
} // Manifest
// Auxiliary Data attached to a version.
// Only load on-demand.
message VersionAuxData {
// key-value metadata.
map<string, bytes> metadata = 3;
}
// Metadata describing the index.
message IndexMetadata {
// Unique ID of an index. It is unique across all the dataset versions.
UUID uuid = 1;
// The columns to build the index.
repeated int32 fields = 2;
// Index name. Must be unique within one dataset version.
string name = 3;
// The version of the dataset this index was built from.
uint64 dataset_version = 4;
/// A bitmap of the included fragment ids.
///
/// This may by used to determine how much of the dataset is covered by the
/// index. This information can be retrieved from the dataset by looking at
/// the dataset at `dataset_version`. However, since the old version may be
/// deleted while the index is still in use, this information is also stored
/// in the index.
///
/// The bitmap is stored as a 32-bit Roaring bitmap.
bytes fragment_bitmap = 5;
/// Details, specific to the index type, which are needed to load / interpret the index
///
/// Indices should avoid putting large amounts of information in this field, as it will
/// bloat the manifest.
google.protobuf.Any index_details = 6;
}
// Index Section, containing a list of index metadata for one dataset version.
message IndexSection {
repeated IndexMetadata indices = 1;
}
// Data fragment. A fragment is a set of files which represent the
// different columns of the same rows.
// If column exists in the schema, but the related file does not exist,
// treat this column as nulls.
message DataFragment {
// Unique ID of each DataFragment
uint64 id = 1;
repeated DataFile files = 2;
// File that indicates which rows, if any, should be considered deleted.
DeletionFile deletion_file = 3;
// TODO: What's the simplest way we can allow an inline tombstone bitmap?
// A serialized RowIdSequence message (see rowids.proto).
//
// These are the row ids for the fragment, in order of the rows as they appear.
// That is, if a fragment has 3 rows, and the row ids are [1, 42, 3], then the
// first row is row 1, the second row is row 42, and the third row is row 3.
oneof row_id_sequence {
// If small (< 200KB), the row ids are stored inline.
bytes inline_row_ids = 5;
// Otherwise, stored as part of a file.
ExternalFile external_row_ids = 6;
} // row_id_sequence
// Number of original rows in the fragment, this includes rows that are
// now marked with deletion tombstones. To compute the current number of rows,
// subtract `deletion_file.num_deleted_rows` from this value.
uint64 physical_rows = 4;
}
// Lance Data File
message DataFile {
// Relative path to the root.
string path = 1;
// The ids of the fields/columns in this file.
//
// -1 is used for "unassigned" while in memory. It is not meant to be written
// to disk. -2 is used for "tombstoned", meaningful a field that is no longer
// in use. This is often because the original field id was reassigned to a
// different data file.
//
// In Lance v1 IDs are assigned based on position in the file, offset by the max
// existing field id in the table (if any already). So when a fragment is first
// created with one file of N columns, the field ids will be 1, 2, ..., N. If a
// second, fragment is created with M columns, the field ids will be N+1, N+2,
// ..., N+M.
//
// In Lance v1 there is one field for each field in the input schema, this includes
// nested fields (both struct and list). Fixed size list fields have only a single
// field id (these are not considered nested fields in Lance v1).
//
// This allows column indices to be calculated from field IDs and the input schema.
//
// In Lance v2 the field IDs generally follow the same pattern but there is no
// way to calculate the column index from the field ID. This is because a given
// field could be encoded in many different ways, some of which occupy a different
// number of columns. For example, a struct field could be encoded into N + 1 columns
// or it could be encoded into a single packed column. To determine column indices
// the column_indices property should be used instead.
//
// In Lance v1 these ids must be sorted but might not always be contiguous.
repeated int32 fields = 2;
// The top-level column indices for each field in the file.
//
// If the data file is version 1 then this property will be empty
//
// Otherwise there must be one entry for each field in `fields`.
//
// Some fields may not correspond to a top-level column in the file. In these cases
// the index will -1.
//
// For example, consider the schema:
//
// - dimension: packed-struct (0):
// - x: u32 (1)
// - y: u32 (2)
// - path: list<u32> (3)
// - embedding: fsl<768> (4)
// - fp64
// - borders: fsl<4> (5)
// - simple-struct (6)
// - margin: fp64 (7)
// - padding: fp64 (8)
//
// One possible column indices array could be:
// [0, -1, -1, 1, 3, 4, 5, 6, 7]
//
// This reflects quite a few phenomenon:
// - The packed struct is encoded into a single column and there is no top-level column
// for the x or y fields
// - The variable sized list is encoded into two columns
// - The embedding is encoded into a single column (common for FSL of primitive) and there
// is not "FSL column"
// - The borders field actually does have an "FSL column"
//
// The column indices table may not have duplicates (other than -1)
repeated int32 column_indices = 3;
// The major file version used to create the file
uint32 file_major_version = 4;
// The minor file version used to create the file
//
// If both `file_major_version` and `file_minor_version` are set to 0,
// then this is a version 0.1 or version 0.2 file.
uint32 file_minor_version = 5;
} // DataFile
// Deletion File
//
// The path of the deletion file is constructed as:
// {root}/_deletions/{fragment_id}-{read_version}-{id}.{extension}
// where {extension} is `.arrow` or `.bin` depending on the type of deletion.
message DeletionFile {
// Type of deletion file, which varies depending on what is the most efficient
// way to store the deleted row offsets. If none, then will be unspecified. If there are
// sparsely deleted rows, then ARROW_ARRAY is the most efficient. If there are
// densely deleted rows, then BIT_MAP is the most efficient.
enum DeletionFileType {
// Deletion file is a single Int32Array of deleted row offsets. This is stored as
// an Arrow IPC file with one batch and one column. Has a .arrow extension.
ARROW_ARRAY = 0;
// Deletion file is a Roaring Bitmap of deleted row offsets. Has a .bin extension.
BITMAP = 1;
}
// Type of deletion file. If it is unspecified, then the remaining fields will be missing.
DeletionFileType file_type = 1;
// The version of the dataset this deletion file was built from.
uint64 read_version = 2;
// An opaque id used to differentiate this file from others written by concurrent
// writers.
uint64 id = 3;
// The number of rows that are marked as deleted.
uint64 num_deleted_rows = 4;
} // DeletionFile
message ExternalFile {
// Path to the file, relative to the root of the table.
string path = 1;
// The offset in the file where the data starts.
uint64 offset = 2;
// The size of the data in the file.
uint64 size = 3;
}
/// The following messages are used for the index_details field in IndexMetadata.
///
/// This is not an exhaustive set of index types and just lists the index types supported
/// by a base distribution of Lance.
// Currently these are all empty messages because all needed details are either hard-coded (e.g.
// filenames) or stored in the index itself. However, we may want to add more details in the
// future, in particular we can add details that may be useful for planning queries (e.g. don't
// force us to load the index until we know we need it)
message BTreeIndexDetails {}
message BitmapIndexDetails {}
message LabelListIndexDetails {}
message InvertedIndexDetails {}
message VectorIndexDetails {}