Skip to content

Commit

Permalink
Add feature definitions and list_features function (fal-ai#84)
Browse files Browse the repository at this point in the history
* Add feature helper

* Add list_features method to FalDbt

* Add list_features magic function

* Remove entity_id and timestamp from features

* Add list_features test

* Fix feature store import

* Move feature_store under fal

* PR comments
  • Loading branch information
mederka authored Dec 23, 2021
1 parent 9622f85 commit 307d72f
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/fal/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def exec(self, context, faldbt: FalDbt):
"list_models": faldbt.list_models,
"list_models_ids": faldbt.list_models_ids,
"list_sources": faldbt.list_sources,
"list_features": faldbt.list_features
},
)

Expand Down
Empty file.
17 changes: 17 additions & 0 deletions src/fal/feature_store/feature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Classes and functions for managing features."""
from dataclasses import dataclass


@dataclass
class Feature:
"""Feature is a column in a dbt model."""

model: str
column: str
entity_id: str
timestamp: str
description: str

def get_name(self) -> str:
"""Return a generated unique name for this feature."""
return f"{self.model}.{self.column}"
34 changes: 34 additions & 0 deletions src/faldbt/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from . import parse
from . import lib
from fal.feature_store.feature import Feature

import firebase_admin
from firebase_admin import firestore
Expand Down Expand Up @@ -101,6 +102,7 @@ class FalDbt:
project_dir: str
profiles_dir: str
keyword: str
features: List[Feature]

_config: RuntimeConfig
_manifest: DbtManifest
Expand Down Expand Up @@ -147,6 +149,8 @@ def __init__(self, project_dir: str,

self._setup_firestore()

self.features = self._find_features()

def get_model_status(self, unique_id: str):
# Default to `skipped` status if not found, it means it did not run
return self._model_status_map.get(unique_id, "skipped")
Expand Down Expand Up @@ -181,6 +185,36 @@ def list_models(self) -> List[DbtModel]:
models.append(model)
return models

def list_features(self) -> List[Feature]:
return self.features

def _find_features(self) -> List[Feature]:
"""List features defined in schema.yml files."""
keyword = self.keyword
models = self.list_models()
models = list(filter(
# Find models that have both feature store and column defs
lambda model: keyword in model.meta
and 'feature_store' in model.meta[keyword]
and len(list(model.columns.keys())) > 0, models))
features = []
for model in models:
for column_name in model.columns.keys():
if column_name == model.meta[keyword]['feature_store']['entity_id']:
continue
if column_name == model.meta[keyword]['feature_store']['timestamp']:
continue
features.append(
Feature(
model=model.name,
column=column_name,
description=model.columns[column_name].description,
entity_id=model.meta[keyword]['feature_store']['entity_id'],
timestamp=model.meta[keyword]['feature_store']['timestamp']
)
)
return features

def ref(
self, target_model_name: str, target_package_name: Optional[str] = None
) -> pd.DataFrame:
Expand Down
7 changes: 7 additions & 0 deletions tests/mock/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@ models:
- fal_scripts/test.py
- name: agent_wait_time
description: Agent wait time series
columns:
- name: a
- name: b
- name: c
config:
materialized: table
meta:
fal:
feature_store:
entity_id: a
timestamp: b
scripts:
# - fal_scripts/test.py

Expand Down
7 changes: 7 additions & 0 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@ def test_scripts():
assert 0 == len(model.get_scripts("fal"))
if model.name == "zendesk_ticket_data":
assert 1 == len(model.get_scripts("fal"))

# Feature definitions
features = faldbt.list_features()

assert 1 == len(features)
assert "a" == features[0].entity_id
assert "agent_wait_time" == features[0].model

0 comments on commit 307d72f

Please sign in to comment.