Skip to content

Commit

Permalink
feat: A new internal lookup (vectordotdev#4066)
Browse files Browse the repository at this point in the history
* wip

Signed-off-by: Ana Hobden <[email protected]>

* Refine new lookup a bunch, handle indexes

Signed-off-by: Ana Hobden <[email protected]>

* Better logging

Signed-off-by: Ana Hobden <[email protected]>

* Better logging

Signed-off-by: Ana Hobden <[email protected]>

* wip

Signed-off-by: Ana Hobden <[email protected]>

* add files I forgot

Signed-off-by: Ana Hobden <[email protected]>

* More

Signed-off-by: Ana Hobden <[email protected]>

* Fixup

Signed-off-by: Ana Hobden <[email protected]>

* I MADE AN ICE

Signed-off-by: Ana Hobden <[email protected]>

* Start adding to add_fields

Signed-off-by: Ana Hobden <[email protected]>

* Integrate with add_fields

Signed-off-by: Ana Hobden <[email protected]>

* Add fixture tests

Signed-off-by: Ana Hobden <[email protected]>

* Add a test

Signed-off-by: Ana Hobden <[email protected]>

* Add serializing and deserializing

Signed-off-by: Ana Hobden <[email protected]>

* add some benches

Signed-off-by: Ana Hobden <[email protected]>

* Finalize benches

Signed-off-by: Ana Hobden <[email protected]>

* fmt

Signed-off-by: Ana Hobden <[email protected]>

* Handle rename

Signed-off-by: Ana Hobden <[email protected]>

* Fix lints

Signed-off-by: Ana Hobden <[email protected]>

* Update RFC

Signed-off-by: Ana Hobden <[email protected]>

* Fix tests

Signed-off-by: Ana Hobden <[email protected]>

* Add a bit more API

Signed-off-by: Ana Hobden <[email protected]>

* Comments

Signed-off-by: Ana Hobden <[email protected]>

* Fixup

Signed-off-by: Ana Hobden <[email protected]>

* fmt

Signed-off-by: Ana Hobden <[email protected]>

* touchups

Signed-off-by: Ana Hobden <[email protected]>

* Clean up some lints

Signed-off-by: Ana Hobden <[email protected]>

* Fmt

Signed-off-by: Ana Hobden <[email protected]>

* Fixups of clippy

Signed-off-by: Ana Hobden <[email protected]>

* fmt

Signed-off-by: Ana Hobden <[email protected]>

* Lint fixups

Signed-off-by: Ana Hobden <[email protected]>

* Fixup

Signed-off-by: Ana Hobden <[email protected]>
  • Loading branch information
Hoverbear authored Sep 30, 2020
1 parent f227f49 commit b031ead
Show file tree
Hide file tree
Showing 41 changed files with 983 additions and 214 deletions.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,11 @@ harness = false
name = "isolated_buffering"
harness = false

[[bench]]
name = "lookup"
harness = false


[[bench]]
name = "wasm"
harness = false
Expand Down
8 changes: 4 additions & 4 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,11 @@ fn benchmark_remap(c: &mut Criterion) {

c.bench_function("remap: add fields with add_fields", |b| {
let mut fields = IndexMap::new();
fields.insert("foo".into(), "bar".into());
fields.insert("bar".into(), "baz".into());
fields.insert("copy".into(), "{{ copy_from }}".into());
fields.insert("foo".into(), String::from("bar").into());
fields.insert("bar".into(), String::from("baz").into());
fields.insert("copy".into(), String::from("{{ copy_from }}").into());

let tform = AddFields::new(fields, true);
let tform = AddFields::new(fields, true).unwrap();

b.iter(add_fields_runner(Box::new(tform)))
});
Expand Down
121 changes: 121 additions & 0 deletions benches/lookup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use indexmap::map::IndexMap;
use std::convert::TryFrom;
use std::{fs, io::Read, path::Path};
use vector::event::Lookup;

const FIXTURE_ROOT: &str = "tests/data/fixtures/lookup";

fn parse_artifact(path: impl AsRef<Path>) -> std::io::Result<String> {
let mut test_file = match fs::File::open(path) {
Ok(file) => file,
Err(e) => return Err(e),
};

let mut buf = Vec::new();
test_file.read_to_end(&mut buf)?;
let string = String::from_utf8(buf).unwrap();
Ok(string)
}

// This test iterates over the `tests/data/fixtures/lookup` folder and ensures the lookup parsed,
// then turned into a string again is the same.
fn lookup_to_string(c: &mut Criterion) {
vector::test_util::trace_init();
let mut fixtures = IndexMap::new();

std::fs::read_dir(FIXTURE_ROOT)
.unwrap()
.for_each(|fixture_file| match fixture_file {
Ok(fixture_file) => {
let path = fixture_file.path();
tracing::trace!(?path, "Opening.");
let buf = parse_artifact(&path).unwrap();
fixtures.insert(path, buf);
}
_ => panic!("This test should never read Err'ing test fixtures."),
});

let mut group_from_elem = c.benchmark_group("from_string");
for (_path, fixture) in fixtures.iter() {
group_from_elem.throughput(Throughput::Bytes(fixture.clone().into_bytes().len() as u64));
group_from_elem.bench_with_input(
BenchmarkId::from_parameter(&fixture),
&fixture.clone(),
move |b, ref param| {
let input = &(*param).clone();
b.iter_with_setup(
|| input.clone(),
|input| {
let lookup = Lookup::try_from(input).unwrap();
black_box(lookup)
},
)
},
);
}
group_from_elem.finish();

let mut group_to_string = c.benchmark_group("to_string");
for (_path, fixture) in fixtures.iter() {
group_to_string.throughput(Throughput::Bytes(fixture.clone().into_bytes().len() as u64));
group_to_string.bench_with_input(
BenchmarkId::from_parameter(&fixture),
&fixture.clone(),
move |b, ref param| {
let input = &(*param).clone();
b.iter_with_setup(
|| Lookup::try_from(input.clone()).unwrap(),
|input| {
let string = input.to_string();
black_box(string)
},
)
},
);
}
group_to_string.finish();

let mut group_serialize = c.benchmark_group("serialize");
for (_path, fixture) in fixtures.iter() {
group_serialize.throughput(Throughput::Bytes(fixture.clone().into_bytes().len() as u64));
group_serialize.bench_with_input(
BenchmarkId::from_parameter(&fixture),
&fixture.clone(),
move |b, ref param| {
let input = &(*param).clone();
b.iter_with_setup(
|| Lookup::try_from(input.clone()).unwrap(),
|input| {
let string = serde_json::to_string(&input);
black_box(string)
},
)
},
);
}
group_serialize.finish();

let mut group_deserialize = c.benchmark_group("deserialize");
for (_path, fixture) in fixtures.iter() {
group_deserialize.throughput(Throughput::Bytes(fixture.clone().into_bytes().len() as u64));
group_deserialize.bench_with_input(
BenchmarkId::from_parameter(&fixture),
&fixture.clone(),
move |b, ref param| {
let input = &(*param).clone();
b.iter_with_setup(
|| serde_json::to_string(&Lookup::try_from(input.clone()).unwrap()).unwrap(),
|input| {
let lookup: Lookup = serde_json::from_str(&input).unwrap();
black_box(lookup)
},
)
},
);
}
group_deserialize.finish();
}

criterion_group!(lookup, lookup_to_string);
criterion_main!(lookup);
9 changes: 4 additions & 5 deletions benches/lua.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use bytes::Bytes;
use criterion::{criterion_group, Benchmark, Criterion};
use indexmap::IndexMap;
use std::str::FromStr;
use transforms::lua::v2::LuaConfig;
use vector::{
config::{TransformConfig, TransformContext},
event::Lookup,
transforms::{self, Transform},
Event,
};
Expand All @@ -27,11 +29,8 @@ fn add_fields(c: &mut Criterion) {
b.iter_with_setup(
|| {
let mut map = IndexMap::new();
map.insert(
key.to_string().into(),
toml::value::Value::String(value.to_string()),
);
transforms::add_fields::AddFields::new(map, true)
map.insert(Lookup::from_str(key).unwrap(), String::from(value).into());
transforms::add_fields::AddFields::new(map, true).unwrap()
},
|mut transform| {
for _ in 0..num_events {
Expand Down
16 changes: 8 additions & 8 deletions rfcs/2020-05-25-2692-more-usable-logevents.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ Before we can fully grip at our internal `LogEvent` type, let's review a particu

### Dogma and Intent

Vector has embraced the idea of **Paths**, which are analogous to CSS Selectors or `jq` selectors, that can represent an arbitrary selection of value(s). It is irrelevant to the user how the `LogEvent` is internally structured, so long as they can utilize these paths to work with a `LogEvent`.
Vector has embraced the idea of **Lookups**, which are analogous to CSS Selectors or `jq` selectors, that can represent an arbitrary selection of value(s). It is irrelevant to the user how the `LogEvent` is internally structured, so long as they can utilize these Lookups to work with a `LogEvent`.

The `LogEvent` variant used under a two primary intents:

* **Programmatically, through our internal code.** When used in this way, the `LogEvent` is being interacted with through static, safe interfaces and we can utilize the full type system available to us.
* **Through configuration or FFI via knobs or scripts.** When used this way, the configuration format (at this time, TOML) may impact how aesthetic or expressive our configuration is. For example, TOML values like `a.b.c` as defined in a configuration. This is TOML table syntax, and we can't just magically treat it like a `Path`, a conversion step is necessary.
* **Through configuration or FFI via knobs or scripts.** When used this way, the configuration format (at this time, TOML) may impact how aesthetic or expressive our configuration is. For example, TOML values like `a.b.c` as defined in a configuration. This is TOML table syntax, and we can't just magically treat it like a `Lookup`, a conversion step is necessary.

### Understanding `LogEvent` in Vector 0.9.0

Expand Down Expand Up @@ -183,14 +183,14 @@ In the [WASM transform](https://github.com/timberio/vector/pull/2006/files) our
This RFC ultimately proposes the following steps:

1. Add UX improvements on `LogEvent`, particularly turning JSON into or from `LogEvent`.
1. Refactor the `PathIter` to make `vector::Event::Path` type.
1. Add UX improvements on `Path` , particularly an internal `String``Path` with an `Into`/`From` that does not do path parsing, as well as a `Path::parse(s: String)` that does.
1. Refactor all `LogEvent` to accept `Into<Path>` values.
1. Remove obsolete functionality like `insert_path` since the new `Path` type covers this.
2. Refactor the `keys` function to return an `Iterator<Path>`
1. Refactor the `PathIter` to make `vector::event::Lookup` type.
1. Add UX improvements on `Lookup` , particularly an internal `String``Lookup` with an `Into`/`From` that does not do path parsing, as well as a `<Lookup as std::str::FromStr>::from_str(s: String)` that does. (This also enables `"foo.bar".parse::<Lookup>()?`)
1. Refactor all `LogEvent` to accept `Into<Lookup>` values.
1. Remove obsolete functionality like `insert_path` since the new `Lookup` type covers this.
2. Refactor the `keys` function to return an `Iterator<Lookup>`
1. Add an `Entry` style API to `LogEvent`.
1. Remove functionality rendered obsolete by the Entry API like `try_insert`, moving them to use the new Entry API
1. Provide `iter` and `iter_mut` functions that yield `(Path, Value)`.
1. Provide `iter` and `iter_mut` functions that yield `(Lookup, Value)`.
1. Remove the `all_fields` function, moving them to the new iterator.

We believe these steps will provide a more ergonomic and consistent API.
Expand Down
8 changes: 4 additions & 4 deletions src/config/unit_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,10 @@ fn build_input(config: &Config, input: &TestInput) -> Result<(Vec<String>, Event
let mut event = Event::from("");
for (path, value) in log_fields {
let value: Value = match value {
TestInputValue::String(s) => s.as_bytes().to_vec().into(),
TestInputValue::Boolean(b) => (*b).into(),
TestInputValue::Integer(i) => (*i).into(),
TestInputValue::Float(f) => (*f).into(),
TestInputValue::String(s) => Value::from(s.to_owned()),
TestInputValue::Boolean(b) => Value::from(*b),
TestInputValue::Integer(i) => Value::from(*i),
TestInputValue::Float(f) => Value::from(*f),
};
event.as_mut_log().insert(path.to_owned(), value);
}
Expand Down
11 changes: 6 additions & 5 deletions src/event/log_event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ impl LogEvent {
util::log::get_mut(&mut self.fields, key)
}

pub fn contains(&self, key: &DefaultAtom) -> bool {
util::log::contains(&self.fields, key)
pub fn contains(&self, key: impl AsRef<str>) -> bool {
util::log::contains(&self.fields, key.as_ref())
}

pub fn insert<K, V>(&mut self, key: K, value: V) -> Option<Value>
Expand Down Expand Up @@ -63,8 +63,8 @@ impl LogEvent {
util::log::remove(&mut self.fields, &key, false)
}

pub fn remove_prune(&mut self, key: &DefaultAtom, prune: bool) -> Option<Value> {
util::log::remove(&mut self.fields, &key, prune)
pub fn remove_prune(&mut self, key: impl AsRef<str>, prune: bool) -> Option<Value> {
util::log::remove(&mut self.fields, key.as_ref(), prune)
}

pub fn keys<'a>(&'a self) -> impl Iterator<Item = String> + 'a {
Expand Down Expand Up @@ -138,7 +138,8 @@ impl std::ops::Index<&DefaultAtom> for LogEvent {
type Output = Value;

fn index(&self, key: &DefaultAtom) -> &Value {
self.get(key).expect("Key is not found")
self.get(key)
.expect(&*format!("Key is not found: {:?}", key))
}
}

Expand Down
Loading

0 comments on commit b031ead

Please sign in to comment.