Skip to content

Commit

Permalink
Add view_url for DagBundles (apache#45126)
Browse files Browse the repository at this point in the history
This PR adds view_url to Dagbundles to enable viewing the bundle's
version
  • Loading branch information
ephraimbuddy authored Jan 8, 2025
1 parent 39d7f1c commit adbe4e2
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 0 deletions.
8 changes: 8 additions & 0 deletions airflow/dag_processing/bundles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@ def get_current_version(self) -> str | None:
@abstractmethod
def refresh(self) -> None:
"""Retrieve the latest version of the files in the bundle."""

def view_url(self, version: str | None = None) -> str | None:
"""
URL to view the bundle.
:param version: Version to view
:return: URL to view the bundle
"""
29 changes: 29 additions & 0 deletions airflow/dag_processing/bundles/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import os
from typing import TYPE_CHECKING
from urllib.parse import urlparse

from git import Repo
from git.exc import BadName
Expand Down Expand Up @@ -126,3 +127,31 @@ def refresh(self) -> None:

self.bare_repo.remotes.origin.fetch("+refs/heads/*:refs/heads/*")
self.repo.remotes.origin.pull()

def _convert_git_ssh_url_to_https(self) -> str:
if not self.repo_url.startswith("git@"):
raise ValueError(f"Invalid git SSH URL: {self.repo_url}")
parts = self.repo_url.split(":")
domain = parts[0].replace("git@", "https://")
repo_path = parts[1].replace(".git", "")
return f"{domain}/{repo_path}"

def view_url(self, version: str | None = None) -> str | None:
if not version:
return None
url = self.repo_url
if url.startswith("git@"):
url = self._convert_git_ssh_url_to_https()
parsed_url = urlparse(url)
host = parsed_url.hostname
if not host:
return None
host_patterns = {
"github.com": f"{url}/tree/{version}",
"gitlab.com": f"{url}/-/tree/{version}",
"bitbucket.org": f"{url}/src/{version}",
}
for allowed_host, template in host_patterns.items():
if host == allowed_host or host.endswith(f".{allowed_host}"):
return template
return None
4 changes: 4 additions & 0 deletions airflow/dag_processing/bundles/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,7 @@ def get_all_dag_bundles(self) -> Iterable[BaseDagBundle]:
"""
for name, (class_, kwargs) in self._bundle_config.items():
yield class_(name=name, version=None, **kwargs)

def view_url(self, name: str, version: str | None = None) -> str | None:
bundle = self.get_bundle(name, version)
return bundle.view_url(version=version)
11 changes: 11 additions & 0 deletions tests/dag_processing/bundles/test_dag_bundle_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from airflow.models.dagbundle import DagBundleModel
from airflow.utils.session import create_session

from tests_common.test_utils.config import conf_vars
from tests_common.test_utils.db import clear_db_dag_bundles


Expand Down Expand Up @@ -156,3 +157,13 @@ def _get_bundle_names_and_active():
manager = DagBundlesManager()
manager.sync_bundles_to_db()
assert _get_bundle_names_and_active() == [("dags-folder", False), ("my-test-bundle", True)]


@conf_vars({("dag_bundles", "backends"): json.dumps(BASIC_BUNDLE_CONFIG)})
@pytest.mark.parametrize("version", [None, "hello"])
def test_view_url(version):
"""Test that view_url calls the bundle's view_url method."""
bundle_manager = DagBundlesManager()
with patch.object(BaseDagBundle, "view_url") as view_url_mock:
bundle_manager.view_url("my-test-bundle", version=version)
view_url_mock.assert_called_once_with(version=version)
35 changes: 35 additions & 0 deletions tests/dag_processing/test_dag_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import tempfile
from pathlib import Path
from unittest import mock

import pytest
from git import Repo
Expand Down Expand Up @@ -265,3 +266,37 @@ def test_subdir(self, git_repo):
files_in_repo = {f.name for f in bundle.path.iterdir() if f.is_file()}
assert str(bundle.path).endswith(subdir)
assert {"some_new_file.py"} == files_in_repo

@pytest.mark.parametrize(
"repo_url, expected_url",
[
("[email protected]:apache/airflow.git", "https://github.com/apache/airflow/tree/0f0f0f"),
("[email protected]:apache/airflow.git", "https://gitlab.com/apache/airflow/-/tree/0f0f0f"),
("[email protected]:apache/airflow.git", "https://bitbucket.org/apache/airflow/src/0f0f0f"),
(
"[email protected]:apache/airflow.git",
"https://myorg.github.com/apache/airflow/tree/0f0f0f",
),
],
)
@mock.patch("airflow.dag_processing.bundles.git.Repo")
def test_view_url(self, mock_gitrepo, repo_url, expected_url):
bundle = GitDagBundle(
name="test",
refresh_interval=300,
repo_url=repo_url,
tracking_ref="main",
)
view_url = bundle.view_url("0f0f0f")
assert view_url == expected_url

@mock.patch("airflow.dag_processing.bundles.git.Repo")
def test_view_url_returns_none_when_no_version_in_view_url(self, mock_gitrepo):
bundle = GitDagBundle(
name="test",
refresh_interval=300,
repo_url="[email protected]:apache/airflow.git",
tracking_ref="main",
)
view_url = bundle.view_url(None)
assert view_url is None

0 comments on commit adbe4e2

Please sign in to comment.