Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add Kafka integration for Parseable server #936 . #1047

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4cc28cd
Feat: Add Kafka integration for Parseable server #936 .
hippalus Dec 21, 2024
06364b5
Merge remote-tracking branch 'origin' into kafka-integration
hippalus Dec 21, 2024
f085a79
fix conflicts
hippalus Dec 21, 2024
d32eae5
update cli.rs
hippalus Dec 21, 2024
693b9c9
remove unused fn from metadata.rs
hippalus Dec 21, 2024
3cc6b0e
add Copyright
hippalus Dec 21, 2024
12f0358
fix deepsource issues
hippalus Dec 21, 2024
0f6ca53
fix deepsource issues on shutdown.rs
hippalus Dec 21, 2024
58cc468
Add .idea to .gitignore
hippalus Dec 21, 2024
aff48a2
Add Kafka cluster setup to docker-compose files and refactor Dockerfi…
hippalus Dec 22, 2024
9ce1031
feat(metrics): add KafkaMetricsCollector for Prometheus integration .
hippalus Dec 23, 2024
6719a0e
Merge branch 'main' into kafka-integration
hippalus Dec 23, 2024
d8d0558
fix kafka metrics collector registeration
hippalus Dec 24, 2024
6ad0805
Merge branch 'main' into kafka-integration
hippalus Dec 26, 2024
4d13ee2
Refactor connector configurations to adapt parseable cli options.
hippalus Dec 26, 2024
d26d4de
Refactor metrics.rs to reduce cyclomatic complexity.
hippalus Dec 26, 2024
7604bc5
Refactor chunk size configuration
hippalus Dec 26, 2024
3a0fbb0
use comment instead todo! macro
hippalus Dec 26, 2024
7f94f3a
add license header
hippalus Dec 26, 2024
bfb4071
cargo update
hippalus Dec 26, 2024
73a8659
add resource limits for docker containers
hippalus Dec 26, 2024
9afc8d9
Merge branch 'main' into kafka-integration
hippalus Dec 27, 2024
bb3b5cb
scale down kafka broker to single node since OOM on integration test …
hippalus Dec 27, 2024
2df4727
add Install dependencies step to coverage.yaml
hippalus Dec 27, 2024
839bef8
improve logging and err handling
hippalus Dec 28, 2024
935fc40
change log rate
hippalus Dec 28, 2024
a44582d
comment out kafka-ui in docker-compose
hippalus Dec 28, 2024
8937c4d
refactor py script
hippalus Dec 28, 2024
0064727
refactor py script
hippalus Dec 28, 2024
5355634
update dist-test with LOG_RATE=500 TOTAL_LOGS=50000
hippalus Dec 28, 2024
32c17bd
update dist-test with topic REPLICATION_FACTOR=3
hippalus Dec 28, 2024
cc236d0
Separate kafka and standard dockerfiles. Add conditional compilation …
hippalus Dec 30, 2024
7be0ca8
Merge branch 'main' into kafka-integration
hippalus Dec 31, 2024
aade3a8
fix rust fmt
hippalus Dec 31, 2024
9ba40b5
Use dedicated runtime for KafkaSinkConnector to ensure true parallelism.
hippalus Jan 2, 2025
ecbd655
Merge branch 'main' into kafka-integration
hippalus Jan 4, 2025
5c67134
add schema version when deserialize ParseableEvent
hippalus Jan 4, 2025
ce2fca0
rename Event as ParseableEvent
hippalus Jan 4, 2025
835e9b9
-v flag to clean up volumes when stopping containers. Remove the erro…
hippalus Jan 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge remote-tracking branch 'origin' into kafka-integration
  • Loading branch information
hippalus committed Dec 21, 2024
commit 06364b551fd94d9deb72f48acdb9d5dc0ff0b1a7
56 changes: 41 additions & 15 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use crate::connectors::common::types::ConnectorType;
use crate::connectors::common::BadData;
use crate::connectors::kafka::config::{ConsumerConfig, KafkaConfig, SourceOffset};
use crate::{
kafka::SslProtocol,
oidc::{self, OpenidConfig},
option::{validation, Compression, Mode},
};
Expand All @@ -51,9 +50,15 @@ pub struct Cli {
pub domain_address: Option<Url>,

/// The local staging path is used as a temporary landing point
/// for incoming events
/// for incoming events and local cache
pub local_staging_path: PathBuf,

/// The local cache path is used for speeding up query on latest data
pub local_cache_path: Option<PathBuf>,

/// Size for local cache
pub local_cache_size: u64,

/// Username for the basic authentication on the server
pub username: String,

Expand Down Expand Up @@ -96,6 +101,12 @@ pub struct Cli {
/// port use by airplane(flight query service)
pub flight_port: u16,

/// to query cached data
pub query_cache_path: Option<PathBuf>,

/// Size for local cache
pub query_cache_size: u64,

/// CORS behaviour
pub cors: bool,

Expand Down Expand Up @@ -125,6 +136,10 @@ impl Cli {
pub const ADDRESS: &'static str = "address";
pub const DOMAIN_URI: &'static str = "origin";
pub const STAGING: &'static str = "local-staging-path";
pub const CACHE: &'static str = "cache-path";
pub const QUERY_CACHE: &'static str = "query-cache-path";
pub const QUERY_CACHE_SIZE: &'static str = "query-cache-size";
pub const CACHE_SIZE: &'static str = "cache-size";
pub const USERNAME: &'static str = "username";
pub const PASSWORD: &'static str = "password";
pub const CHECK_UPDATE: &'static str = "check-update";
Expand Down Expand Up @@ -891,13 +906,8 @@ impl FromArgMatches for Cli {
self.trino_schema = m.get_one::<String>(Self::TRINO_SCHEMA).cloned();
self.trino_username = m.get_one::<String>(Self::TRINO_USER_NAME).cloned();

self.kafka_topics = m.get_one::<String>(Self::KAFKA_TOPICS).cloned();
self.kafka_host = m.get_one::<String>(Self::KAFKA_HOST).cloned();
self.kafka_group = m.get_one::<String>(Self::KAFKA_GROUP).cloned();
self.kafka_client_id = m.get_one::<String>(Self::KAFKA_CLIENT_ID).cloned();
self.kafka_security_protocol = m.get_one::<SslProtocol>(Self::KAFKA_SECURITY_PROTOCOL).cloned();
self.kafka_partitions = m.get_one::<String>(Self::KAFKA_PARTITIONS).cloned();

self.local_cache_path = m.get_one::<PathBuf>(Self::CACHE).cloned();
self.query_cache_path = m.get_one::<PathBuf>(Self::QUERY_CACHE).cloned();
self.tls_cert_path = m.get_one::<PathBuf>(Self::TLS_CERT).cloned();
self.tls_key_path = m.get_one::<PathBuf>(Self::TLS_KEY).cloned();
self.trusted_ca_certs_path = m.get_one::<PathBuf>(Self::TRUSTED_CA_CERTS_PATH).cloned();
Expand All @@ -917,6 +927,14 @@ impl FromArgMatches for Cli {
.get_one::<PathBuf>(Self::STAGING)
.cloned()
.expect("default value for staging");
self.local_cache_size = m
.get_one::<u64>(Self::CACHE_SIZE)
.cloned()
.expect("default value for cache size");
self.query_cache_size = m
.get_one(Self::QUERY_CACHE_SIZE)
.cloned()
.expect("default value for query cache size");
self.username = m
.get_one::<String>(Self::USERNAME)
.cloned()
Expand Down Expand Up @@ -959,12 +977,20 @@ impl FromArgMatches for Cli {
.get_one::<usize>(Self::ROW_GROUP_SIZE)
.cloned()
.expect("default for row_group size");
self.parquet_compression = serde_json::from_str(&format!(
"{:?}",
m.get_one::<String>(Self::PARQUET_COMPRESSION_ALGO)
.expect("default for compression algo")
))
.expect("unexpected compression algo");
self.parquet_compression = match m
.get_one::<String>(Self::PARQUET_COMPRESSION_ALGO)
.expect("default for compression algo")
.as_str()
{
"uncompressed" => Compression::UNCOMPRESSED,
"snappy" => Compression::SNAPPY,
"gzip" => Compression::GZIP,
"lzo" => Compression::LZO,
"brotli" => Compression::BROTLI,
"lz4" => Compression::LZ4,
"zstd" => Compression::ZSTD,
_ => unreachable!(),
};

let openid_client_id = m.get_one::<String>(Self::OPENID_CLIENT_ID).cloned();
let openid_client_secret = m.get_one::<String>(Self::OPENID_CLIENT_SECRET).cloned();
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.