Skip to content

Commit

Permalink
feat(powerbi): Report to Dashboard lineage (#12451)
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomezvillamor authored Feb 5, 2025
1 parent ac13f25 commit 32b654c
Show file tree
Hide file tree
Showing 20 changed files with 188 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class Constant:
ACTIVE = "Active"
SQL_PARSING_FAILURE = "SQL Parsing Failure"
M_QUERY_NULL = '"null"'
REPORT_WEB_URL = "reportWebUrl"


@dataclass
Expand Down
31 changes: 28 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,11 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict:
if tile.dataset is not None and tile.dataset.webUrl is not None:
custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl

if tile.report is not None and tile.report.id is not None:
custom_properties[Constant.REPORT_ID] = tile.report.id
if tile.report_id is not None:
custom_properties[Constant.REPORT_ID] = tile.report_id

if tile.report is not None and tile.report.webUrl is not None:
custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl

return custom_properties

Expand Down Expand Up @@ -1053,6 +1056,7 @@ def report_to_dashboard(
report: powerbi_data_classes.Report,
chart_mcps: List[MetadataChangeProposalWrapper],
user_mcps: List[MetadataChangeProposalWrapper],
dashboard_edges: List[EdgeClass],
) -> List[MetadataChangeProposalWrapper]:
"""
Map PowerBi report to Datahub dashboard
Expand All @@ -1074,6 +1078,7 @@ def report_to_dashboard(
charts=chart_urn_list,
lastModified=ChangeAuditStamps(),
dashboardUrl=report.webUrl,
dashboards=dashboard_edges,
)

info_mcp = self.new_mcp(
Expand Down Expand Up @@ -1167,8 +1172,28 @@ def report_to_datahub_work_units(
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)

# find all dashboards with a Tile referencing this report
downstream_dashboards_edges = []
for d in workspace.dashboards.values():
if any(t.report_id == report.id for t in d.tiles):
dashboard_urn = builder.make_dashboard_urn(
platform=self.__config.platform_name,
platform_instance=self.__config.platform_instance,
name=d.get_urn_part(),
)
edge = EdgeClass(
destinationUrn=dashboard_urn,
sourceUrn=None,
created=None,
lastModified=None,
properties=None,
)
downstream_dashboards_edges.append(edge)

# Let's convert report to datahub dashboard
report_mcps = self.report_to_dashboard(workspace, report, chart_mcps, user_mcps)
report_mcps = self.report_to_dashboard(
workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
)

# Now add MCPs in sequence
mcps.extend(ds_mcps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,15 @@ class CreatedFrom(Enum):
id: str
title: str
embedUrl: str
dataset: Optional["PowerBIDataset"]
dataset_id: Optional[str]
report: Optional[Report]
report_id: Optional[str]
createdFrom: CreatedFrom

# In a first pass, `dataset_id` and/or `report_id` are filled in.
# In a subsequent pass, the objects are populated.
dataset: Optional["PowerBIDataset"]
report: Optional[Report]

def get_urn_part(self):
return f"charts.{self.id}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,41 +337,6 @@ def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
-tiles), there is no information available on pagination
"""

def new_dataset_or_report(tile_instance: Any) -> dict:
"""
Find out which is the data source for tile. It is either REPORT or DATASET
"""
report_fields = {
Constant.REPORT: (
self.get_report(
workspace=workspace,
report_id=tile_instance.get(Constant.REPORT_ID),
)
if tile_instance.get(Constant.REPORT_ID) is not None
else None
),
Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
}

# reportId and datasetId are exclusive in tile_instance
# if datasetId is present that means tile is created from dataset
# if reportId is present that means tile is created from report
# if both i.e. reportId and datasetId are not present then tile is created from some visualization
if tile_instance.get(Constant.REPORT_ID) is not None:
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
elif tile_instance.get(Constant.DATASET_ID) is not None:
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
else:
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION

title: Optional[str] = tile_instance.get(Constant.TITLE)
_id: Optional[str] = tile_instance.get(Constant.ID)
created_from: Any = report_fields[Constant.CREATED_FROM]
logger.info(f"Tile {title}({_id}) is created from {created_from}")

return report_fields

tile_list_endpoint: str = self.get_tiles_endpoint(
workspace, dashboard_id=dashboard.id
)
Expand All @@ -393,8 +358,18 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
title=instance.get(Constant.TITLE),
embedUrl=instance.get(Constant.EMBED_URL),
dataset_id=instance.get(Constant.DATASET_ID),
report_id=instance.get(Constant.REPORT_ID),
dataset=None,
**new_dataset_or_report(instance),
report=None,
createdFrom=(
# In the past we considered that only one of the two report_id or dataset_id would be present
# but we have seen cases where both are present. If both are present, we prioritize the report.
Tile.CreatedFrom.REPORT
if instance.get(Constant.REPORT_ID)
else Tile.CreatedFrom.DATASET
if instance.get(Constant.DATASET_ID)
else Tile.CreatedFrom.VISUALIZATION
),
)
for instance in tile_dict
if instance is not None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,13 +625,26 @@ def fill_dashboards() -> None:
dashboard.tiles = self._get_resolver().get_tiles(
workspace, dashboard=dashboard
)
# set the dataset for tiles
# set the dataset and the report for tiles
for tile in dashboard.tiles:
# In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
# https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
if tile.report_id:
tile.report = workspace.reports.get(tile.report_id)
if tile.report is None:
self.reporter.info(
title="Missing Report Lineage For Tile",
message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
)
# However, semantic models (aka datasets) can be shared accross workspaces
# https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
# That's why the global 'dataset_registry' is required
if tile.dataset_id:
tile.dataset = self.dataset_registry.get(tile.dataset_id)
if tile.dataset is None:
self.reporter.info(
title="Missing Lineage For Tile",
title="Missing Dataset Lineage For Tile",
message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
)
Expand All @@ -653,10 +666,10 @@ def fill_dashboard_tags() -> None:
for dashboard in workspace.dashboards.values():
dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])

# fill reports first since some dashboard may reference a report
fill_reports()
if self.__config.extract_dashboards:
fill_dashboards()

fill_reports()
fill_dashboard_tags()
self._fill_independent_datasets(workspace=workspace)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@
"aspect": {
"json": {
"customProperties": {
"createdFrom": "Dataset",
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
"createdFrom": "Report",
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"reportId": "5b218778-e7a5-4d73-8187-f10824047715"
},
"title": "test_tile",
"description": "test_tile",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@
"aspect": {
"json": {
"customProperties": {
"createdFrom": "Dataset",
"createdFrom": "Report",
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
"datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
"reportId": "5b218778-e7a5-4d73-8187-f10824047715"
},
"title": "test_tile",
"description": "test_tile",
Expand Down
Loading

0 comments on commit 32b654c

Please sign in to comment.