Skip to content

Commit

Permalink
[apache#979] improve(doc): Align the configuration code with apache#926
Browse files Browse the repository at this point in the history
… doc changes (apache#989)

### What changes were proposed in this pull request?

This PR proposes to add code change to align with apache#926 doc change.

### Why are the changes needed?

Without this change, there will be a discrepancy between doc and code.

Fix: apache#979 

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing UTs.
  • Loading branch information
jerryshao authored Dec 6, 2023
1 parent cbf36ea commit 0703cbc
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ public class HiveSchemaPropertiesMetadata extends BasePropertiesMetadata {
List<PropertyEntry<?>> propertyEntries =
ImmutableList.of(
stringOptionalPropertyEntry(
LOCATION, "The location for Hive database storage", false, null, false));
LOCATION,
"The directory for Hive database storage. Not required, HMS uses the value of "
+ "`hive.metastore.warehouse.dir` in the hive-site.xml by default",
false,
null,
false));

propertiesMetadata = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,23 @@ public String getSerde() {
booleanReservedPropertyEntry(
EXTERNAL, "Indicate whether it is an external table", false, true),
stringImmutablePropertyEntry(
LOCATION, "HDFS location for table storage", false, null, false, false),
LOCATION,
"The location for table storage. Not required, HMS will use the database location as the parent directory by default",
false,
null,
false,
false),
enumImmutablePropertyEntry(
TABLE_TYPE,
"The type of Hive table",
"Type of the table",
false,
TableType.class,
MANAGED_TABLE,
false,
false),
enumImmutablePropertyEntry(
FORMAT,
"The table storage format",
"The table file format",
false,
StorageFormat.class,
TEXTFILE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ public class IcebergConfig extends Config {

public static final ConfigEntry<String> CATALOG_BACKEND =
new ConfigBuilder(CATALOG_BACKEND_NAME)
.doc("Choose the implementation of the Iceberg catalog")
.doc("Catalog backend of Gravitino Iceberg catalog")
.version("0.2.0")
.stringConf()
.createWithDefault("memory");

public static final ConfigEntry<String> CATALOG_WAREHOUSE =
new ConfigBuilder(WAREHOUSE)
.doc("The warehouse config of the Iceberg catalog")
.doc("Warehouse directory of catalog")
.version("0.2.0")
.stringConf()
.createWithDefault(null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ public class IcebergTablePropertiesMetadata extends BasePropertiesMetadata {
static {
List<PropertyEntry<?>> propertyEntries =
ImmutableList.of(
stringReservedPropertyEntry(COMMENT, "Table comment", true),
stringReservedPropertyEntry(CREATOR, "Table creator info", false),
stringReservedPropertyEntry(COMMENT, "The table comment", true),
stringReservedPropertyEntry(CREATOR, "The table creator", false),
stringReservedPropertyEntry(LOCATION, "Iceberg location for table storage", false),
stringReservedPropertyEntry(
CURRENT_SNAPSHOT_ID,
"The snapshot representing the current state of the table",
"The snapshot represents the current state of the table",
false),
stringReservedPropertyEntry(
CHERRY_PICK_SNAPSHOT_ID,
"Selecting a specific snapshots in a merge operation",
"Selecting a specific snapshot in a merge operation",
false),
stringReservedPropertyEntry(
SORT_ORDER, "Selecting a specific snapshots in a merge operation", false),
SORT_ORDER, "Selecting a specific snapshot in a merge operation", false),
stringReservedPropertyEntry(
IDENTIFIER_FIELDS, "The identifier field(s) for defining the table", false));
PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName);
Expand Down
10 changes: 5 additions & 5 deletions core/src/main/java/com/datastrato/gravitino/Configs.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ public interface Configs {

ConfigEntry<String> ENTITY_STORE =
new ConfigBuilder(ENTITY_STORE_KEY)
.doc("The entity store to use")
.doc("Which storage implementation to use")
.version("0.1.0")
.stringConf()
.createWithDefault(DEFAULT_ENTITY_STORE);

ConfigEntry<String> ENTITY_KV_STORE =
new ConfigBuilder(ENTITY_KV_STORE_KEY)
.doc("The kv entity store to use")
.doc("Detailed implementation of Kv storage")
.version("0.1.0")
.stringConf()
.createWithDefault(DEFAULT_ENTITY_KV_STORE);

ConfigEntry<String> ENTRY_KV_ROCKSDB_BACKEND_PATH =
new ConfigBuilder(ENTITY_KV_ROCKSDB_BACKEND_PATH_KEY)
.doc("The RocksDB backend path for entity store")
.doc("Directory path of `RocksDBKvBackend`")
.version("0.1.0")
.stringConf()
.createWithDefault(DEFAULT_KV_ROCKSDB_BACKEND_PATH);
Expand Down Expand Up @@ -76,15 +76,15 @@ public interface Configs {

ConfigEntry<Long> STORE_TRANSACTION_MAX_SKEW_TIME =
new ConfigBuilder("gravitino.entity.store.maxTransactionSkewTimeMs")
.doc("Max time skew allowed for transaction, Unit: millisecond")
.doc("The maximum skew time of transactions in milliseconds")
.version("0.3.0")
.longConf()
.createWithDefault(2000L);

ConfigEntry<Long> KV_DELETE_AFTER_TIME =
new ConfigBuilder(KV_DELETE_AFTER_TIME_KEY)
.doc(
"The max time that the deleted data and old version data will keep, unit: millisecond. At least 10 minutes and should not larger than 30 days ")
"The maximum time in milliseconds that the deleted data and old version data is kept")
.version("0.3.0")
.longConf()
.createWithDefault(DEFAULT_KV_DELETE_AFTER_TIME);
Expand Down
6 changes: 3 additions & 3 deletions docs/gravitino-manage-hive.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ Example JSON:

### Schema properties

| Property name | Description | example value | Since version |
|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------|---------------|
| `location` | Hive uses the value hive.metastore.warehouse.dir in the Hive configuration file hive-site.xml by default for the directory of Hive database storage. | `/user/hive/warehouse` | 0.1.0 |
| Property name | Description | example value | Since version |
|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------|---------------|
| `location` | The directory for Hive database storage. Not required, HMS uses the value of `hive.metastore.warehouse.dir` in the hive-site.xml by default. | `/user/hive/warehouse` | 0.1.0 |

## Creating an Apache Hive Table

Expand Down
15 changes: 8 additions & 7 deletions docs/gravitino-server-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,21 @@ The `gravitino.conf` file lists the configuration items in the following table.
| `gravitino.server.webserver.maxThreads` | The maximum number of threads in the thread pool used by Jetty webserver. `maxThreads` will be adjusted to 8 if the value is less than 8, and `maxThreads` must be great or equal to `minThreads` | `Math.max(Runtime.getRuntime().availableProcessors() * 4, 400)` | 0.1.0 |
| `gravitino.server.webserver.threadPoolWorkQueueSize` | The size of the queue in the thread pool used by Jetty webserver. | `100` | 0.1.0 |
| `gravitino.server.webserver.stopTimeout` | Time in milliseconds to gracefully shutdown the Jetty webserver, for more, please see `org.eclipse.jetty.server.Server#setStopTimeout`. | `30000` | 0.2.0 |
| `gravitino.server.webserver.idleTimeout` | The timeout in milliseconds of idle connections. ms. | `30000` | 0.2.0 |
| `gravitino.server.webserver.idleTimeout` | The timeout in milliseconds of idle connections. | `30000` | 0.2.0 |
| `gravitino.server.webserver.requestHeaderSize` | Maximum size of HTTP requests. | `131072` | 0.1.0 |
| `gravitino.server.webserver.responseHeaderSize` | Maximum size of HTTP responses. | `131072` | 0.1.0 |
| `gravitino.server.shutdown.timeout` | Time in milliseconds to gracefully shutdown of the Gravitino webserver. | `3000` | 0.2.0 |

### Storage configuration

| Configuration item | Description | Default value | Since version |
|---------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------|---------------|
| `gravitino.entity.store` | Which storage implementation to use, currently supported is key-value pair storage, the default value is `kv`. | `kv` | 0.1.0 |
| `gravitino.entity.store.kv` | Detailed implementation of Kv storage, currently supported is `RocksDB` storage implementation `RocksDBKvBackend`. | `RocksDBKvBackend` | 0.1.0 |
| Configuration item | Description | Default value | Since version |
|---------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------|---------------|
| `gravitino.entity.store` | Which storage implementation to use, currently supported is key-value pair storage, the default value is `kv`. | `kv` | 0.1.0 |
| `gravitino.entity.store.kv` | Detailed implementation of Kv storage, currently supported is `RocksDB` storage implementation `RocksDBKvBackend`. | `RocksDBKvBackend` | 0.1.0 |
| `gravitino.entity.store.kv.rocksdbPath` | Directory path of `RocksDBKvBackend`, **It's highly recommend that you change this default value** as it's under the deploy directory and future version upgrades may remove it. | `${GRAVITINO_HOME}/data/rocksdb` | 0.1.0 |
| `gravitino.entity.store.maxTransactionSkewTimeMs` | The maximum skew time of transactions in milliseconds. | `2000` | 0.3.0 |
| `gravitino.entity.store.kv.deleteAfterTimeMs` | The maximum time in milliseconds that the deleted data and old version data is kept. Set to at least 10 minutes and no longer than 30 days. | `604800000`(7 days) | 0.3.0 |
| `graivitino.entity.serde` | The serialization/deserialization class used to support entity storage, currently supported is `proto`. | `proto` | 0.1.0 |
| `gravitino.entity.store.maxTransactionSkewTimeMs` | The maximum skew time of transactions in milliseconds. | `2000` | 0.3.0 |
| `gravitino.entity.store.kv.deleteAfterTimeMs` | The maximum time in milliseconds that the deleted data and old version data is kept. Set to at least 10 minutes and no longer than 30 days. | `604800000`(7 days) | 0.3.0 |

### Catalog configuration

Expand Down
26 changes: 13 additions & 13 deletions docs/gravitino-use-iceberg.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ http://{GravitinoServerHost}:8090/api/metalakes/{Your_metalake_name}/catalogs

### Catalog configuration

| Configuration item | Description | value |
|--------------------|--------------------------------------------------|--------------------------------------------------------------------------------------------------------|
| `catalog-backend` | Catalog backend of Gravitino Iceberg | `hive` or `jdbc` |
| `uri` | Hive metadata address or JDBC connection address | `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/` or `jdbc:mysql://127.0.0.1:3306/test` |
| `warehouse` | Warehouse directory of Catalog | `/user/hive/warehouse-hive/` or `hdfs://namespace/hdfs/path` |
| Configuration item | Description | value |
|--------------------|-----------------------------------------------|--------------------------------------------------------------------------------------------------------|
| `catalog-backend` | Catalog backend of Gravitino Iceberg catalog. | `hive` or `jdbc` |
| `uri` | The uri config of the Iceberg catalog. | `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/` or `jdbc:mysql://127.0.0.1:3306/test` |
| `warehouse` | Warehouse directory of catalog. | `/user/hive/warehouse-hive/` or `hdfs://namespace/hdfs/path` |

### HDFS configuration

Expand Down Expand Up @@ -138,15 +138,15 @@ Iceberg lacks distribution support. For buckets, consider using partitions inste

Gravitino reserves the following fields, and you can't pass them in properties.

| Configuration item | Description |
|---------------------------|-----------------------------------------------------------|
| `comment` | The table comment. |
| `creator` | The table creator. |
| `location` | Iceberg location for table storage. |
| Configuration item | Description |
|---------------------------|---------------------------------------------------------|
| `comment` | The table comment. |
| `creator` | The table creator. |
| `location` | Iceberg location for table storage. |
| `current-snapshot-id` | The snapshot represents the current state of the table. |
| `cherry-pick-snapshot-id` | Selecting a specific snapshot in a merge operation. |
| `sort-order` | Selecting a specific snapshot in a merge operation. |
| `identifier-fields` | The identifier fields for defining the table. |
| `cherry-pick-snapshot-id` | Selecting a specific snapshot in a merge operation. |
| `sort-order` | Selecting a specific snapshot in a merge operation. |
| `identifier-fields` | The identifier field(s) for defining the table. |

Regarding Iceberg's properties, you can refer to [official documentation](https://iceberg.apache.org/docs/1.3.1/configuration/).

Expand Down
Loading

0 comments on commit 0703cbc

Please sign in to comment.