Skip to content

Commit

Permalink
ci(java): introduce spotless-maven-plugin (#3193)
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua authored Dec 3, 2024
1 parent 3d3ebf2 commit c5a1382
Show file tree
Hide file tree
Showing 62 changed files with 1,499 additions and 1,067 deletions.
135 changes: 91 additions & 44 deletions java/core/src/main/java/com/lancedb/lance/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,7 @@
import com.lancedb.lance.index.IndexType;
import com.lancedb.lance.ipc.LanceScanner;
import com.lancedb.lance.ipc.ScanOptions;
import java.io.Closeable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import org.apache.arrow.c.ArrowArrayStream;
import org.apache.arrow.c.ArrowSchema;
import org.apache.arrow.c.Data;
Expand All @@ -30,6 +25,13 @@
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.types.pojo.Schema;

import java.io.Closeable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

/**
* Class representing a Lance dataset, interfacing with the native lance library. This class
* provides functionality to open and manage datasets with native code. The native library is loaded
Expand Down Expand Up @@ -59,17 +61,22 @@ private Dataset() {}
* @param params write params
* @return Dataset
*/
public static Dataset create(BufferAllocator allocator, String path, Schema schema,
WriteParams params) {
public static Dataset create(
BufferAllocator allocator, String path, Schema schema, WriteParams params) {
Preconditions.checkNotNull(allocator);
Preconditions.checkNotNull(path);
Preconditions.checkNotNull(schema);
Preconditions.checkNotNull(params);
try (ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator)) {
Data.exportSchema(allocator, schema, null, arrowSchema);
Dataset dataset =
createWithFfiSchema(arrowSchema.memoryAddress(), path, params.getMaxRowsPerFile(),
params.getMaxRowsPerGroup(), params.getMaxBytesPerFile(), params.getMode(),
createWithFfiSchema(
arrowSchema.memoryAddress(),
path,
params.getMaxRowsPerFile(),
params.getMaxRowsPerGroup(),
params.getMaxBytesPerFile(),
params.getMode(),
params.getStorageOptions());
dataset.allocator = allocator;
return dataset;
Expand All @@ -85,26 +92,42 @@ public static Dataset create(BufferAllocator allocator, String path, Schema sche
* @param params write parameters
* @return Dataset
*/
public static Dataset create(BufferAllocator allocator, ArrowArrayStream stream, String path,
WriteParams params) {
public static Dataset create(
BufferAllocator allocator, ArrowArrayStream stream, String path, WriteParams params) {
Preconditions.checkNotNull(allocator);
Preconditions.checkNotNull(stream);
Preconditions.checkNotNull(path);
Preconditions.checkNotNull(params);
Dataset dataset = createWithFfiStream(stream.memoryAddress(), path, params.getMaxRowsPerFile(),
params.getMaxRowsPerGroup(), params.getMaxBytesPerFile(), params.getMode(),
params.getStorageOptions());
Dataset dataset =
createWithFfiStream(
stream.memoryAddress(),
path,
params.getMaxRowsPerFile(),
params.getMaxRowsPerGroup(),
params.getMaxBytesPerFile(),
params.getMode(),
params.getStorageOptions());
dataset.allocator = allocator;
return dataset;
}

private static native Dataset createWithFfiSchema(long arrowSchemaMemoryAddress, String path,
Optional<Integer> maxRowsPerFile, Optional<Integer> maxRowsPerGroup,
Optional<Long> maxBytesPerFile, Optional<String> mode, Map<String, String> storageOptions);
private static native Dataset createWithFfiSchema(
long arrowSchemaMemoryAddress,
String path,
Optional<Integer> maxRowsPerFile,
Optional<Integer> maxRowsPerGroup,
Optional<Long> maxBytesPerFile,
Optional<String> mode,
Map<String, String> storageOptions);

private static native Dataset createWithFfiStream(long arrowStreamMemoryAddress, String path,
Optional<Integer> maxRowsPerFile, Optional<Integer> maxRowsPerGroup,
Optional<Long> maxBytesPerFile, Optional<String> mode, Map<String, String> storageOptions);
private static native Dataset createWithFfiStream(
long arrowStreamMemoryAddress,
String path,
Optional<Integer> maxRowsPerFile,
Optional<Integer> maxRowsPerGroup,
Optional<Long> maxBytesPerFile,
Optional<String> mode,
Map<String, String> storageOptions);

/**
* Open a dataset from the specified path.
Expand Down Expand Up @@ -157,20 +180,30 @@ public static Dataset open(BufferAllocator allocator, String path, ReadOptions o
* @param options the open options
* @return Dataset
*/
private static Dataset open(BufferAllocator allocator, boolean selfManagedAllocator, String path,
ReadOptions options) {
private static Dataset open(
BufferAllocator allocator, boolean selfManagedAllocator, String path, ReadOptions options) {
Preconditions.checkNotNull(path);
Preconditions.checkNotNull(allocator);
Preconditions.checkNotNull(options);
Dataset dataset = openNative(path, options.getVersion(), options.getBlockSize(),
options.getIndexCacheSize(), options.getMetadataCacheSize(), options.getStorageOptions());
Dataset dataset =
openNative(
path,
options.getVersion(),
options.getBlockSize(),
options.getIndexCacheSize(),
options.getMetadataCacheSize(),
options.getStorageOptions());
dataset.allocator = allocator;
dataset.selfManagedAllocator = selfManagedAllocator;
return dataset;
}

private static native Dataset openNative(String path, Optional<Integer> version,
Optional<Integer> blockSize, int indexCacheSize, int metadataCacheSize,
private static native Dataset openNative(
String path,
Optional<Integer> version,
Optional<Integer> blockSize,
int indexCacheSize,
int metadataCacheSize,
Map<String, String> storageOptions);

/**
Expand All @@ -180,16 +213,23 @@ private static native Dataset openNative(String path, Optional<Integer> version,
* @param path The file path of the dataset to open.
* @param operation The operation to apply to the dataset.
* @param readVersion The version of the dataset that was used as the base for the changes. This
* is not needed for overwrite or restore operations.
* is not needed for overwrite or restore operations.
* @return A new instance of {@link Dataset} linked to the opened dataset.
*/
public static Dataset commit(BufferAllocator allocator, String path, FragmentOperation operation,
Optional<Long> readVersion) {
public static Dataset commit(
BufferAllocator allocator,
String path,
FragmentOperation operation,
Optional<Long> readVersion) {
return commit(allocator, path, operation, readVersion, new HashMap<>());
}

public static Dataset commit(BufferAllocator allocator, String path, FragmentOperation operation,
Optional<Long> readVersion, Map<String, String> storageOptions) {
public static Dataset commit(
BufferAllocator allocator,
String path,
FragmentOperation operation,
Optional<Long> readVersion,
Map<String, String> storageOptions) {
Preconditions.checkNotNull(allocator);
Preconditions.checkNotNull(path);
Preconditions.checkNotNull(operation);
Expand All @@ -199,8 +239,11 @@ public static Dataset commit(BufferAllocator allocator, String path, FragmentOpe
return dataset;
}

public static native Dataset commitAppend(String path, Optional<Long> readVersion,
List<String> fragmentsMetadata, Map<String, String> storageOptions);
public static native Dataset commitAppend(
String path,
Optional<Long> readVersion,
List<String> fragmentsMetadata,
Map<String, String> storageOptions);

/**
* Create a new Dataset Scanner.
Expand Down Expand Up @@ -249,9 +292,7 @@ public long version() {

private native long nativeVersion();

/**
* @return the latest version of the dataset.
*/
/** @return the latest version of the dataset. */
public long latestVersion() {
try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
Expand All @@ -270,16 +311,24 @@ public long latestVersion() {
* @param params index params
* @param replace whether to replace the existing index
*/
public void createIndex(List<String> columns, IndexType indexType, Optional<String> name,
IndexParams params, boolean replace) {
public void createIndex(
List<String> columns,
IndexType indexType,
Optional<String> name,
IndexParams params,
boolean replace) {
try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
nativeCreateIndex(columns, indexType.getValue(), name, params, replace);
}
}

private native void nativeCreateIndex(List<String> columns, int indexTypeCode,
Optional<String> name, IndexParams params, boolean replace);
private native void nativeCreateIndex(
List<String> columns,
int indexTypeCode,
Optional<String> name,
IndexParams params,
boolean replace);

/**
* Count the number of rows in the dataset.
Expand Down Expand Up @@ -332,9 +381,7 @@ public Schema getSchema() {

private native void importFfiSchema(long arrowSchemaMemoryAddress);

/**
* @return all the created indexes names
*/
/** @return all the created indexes names */
public List<String> listIndexes() {
try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
Expand Down
30 changes: 17 additions & 13 deletions java/core/src/main/java/com/lancedb/lance/DatasetFragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@

import com.lancedb.lance.ipc.LanceScanner;
import com.lancedb.lance.ipc.ScanOptions;
import java.util.Arrays;

import org.apache.arrow.util.Preconditions;

/**
* Dataset format.
* Matching to Lance Rust FileFragment.
*/
import java.util.Arrays;

/** Dataset format. Matching to Lance Rust FileFragment. */
public class DatasetFragment {
/** Pointer to the {@link Dataset} instance in Java. */
private final Dataset dataset;

private final FragmentMetadata metadata;

/** Private constructor, calling from JNI. */
Expand All @@ -42,8 +42,10 @@ public class DatasetFragment {
* @return a dataset scanner
*/
public LanceScanner newScan() {
return LanceScanner.create(dataset, new ScanOptions.Builder()
.fragmentIds(Arrays.asList(metadata.getId())).build(), dataset.allocator);
return LanceScanner.create(
dataset,
new ScanOptions.Builder().fragmentIds(Arrays.asList(metadata.getId())).build(),
dataset.allocator);
}

/**
Expand All @@ -53,9 +55,12 @@ public LanceScanner newScan() {
* @return a dataset scanner
*/
public LanceScanner newScan(long batchSize) {
return LanceScanner.create(dataset,
return LanceScanner.create(
dataset,
new ScanOptions.Builder()
.fragmentIds(Arrays.asList(metadata.getId())).batchSize(batchSize).build(),
.fragmentIds(Arrays.asList(metadata.getId()))
.batchSize(batchSize)
.build(),
dataset.allocator);
}

Expand All @@ -67,7 +72,8 @@ public LanceScanner newScan(long batchSize) {
*/
public LanceScanner newScan(ScanOptions options) {
Preconditions.checkNotNull(options);
return LanceScanner.create(dataset,
return LanceScanner.create(
dataset,
new ScanOptions.Builder(options).fragmentIds(Arrays.asList(metadata.getId())).build(),
dataset.allocator);
}
Expand All @@ -78,9 +84,7 @@ public int getId() {
return metadata.getId();
}

/**
* @return row counts in this Fragment
*/
/** @return row counts in this Fragment */
public int countRows() {
return countRowsNative(dataset, metadata.getId());
}
Expand Down
Loading

0 comments on commit c5a1382

Please sign in to comment.