Skip to content

Commit

Permalink
[BE] Clean up trymerge code handling flaky failures (pytorch#110133)
Browse files Browse the repository at this point in the history
This is the 2nd part of pytorch#110054.  The flaky classification has been done on Dr.CI.  There is no need to download flaky rule files and do the check anymore.  Some tests are also updated with new examples because we mocked the list of flaky rules there.  Similar tests have been done on Dr.CI.

* [x] pytorch#110054
* [x] Clean up the flaky rules logic because it has already been implemented on Dr. CI
* [ ] Clean up the broken trunk logic for the same reason

Pull Request resolved: pytorch#110133
Approved by: https://github.com/clee2000
  • Loading branch information
huydhn authored and pytorchmergebot committed Sep 30, 2023
1 parent f7ba3e8 commit 81a7445
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 116 deletions.
Binary file modified .github/scripts/drci_mocks.json.gz
Binary file not shown.
Binary file modified .github/scripts/gql_mocks.json.gz
Binary file not shown.
Binary file modified .github/scripts/rockset_mocks.json.gz
Binary file not shown.
120 changes: 47 additions & 73 deletions .github/scripts/test_trymerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from trymerge import (
categorize_checks,
find_matching_merge_rule,
FlakyRule,
get_classifications,
get_drci_classifications,
get_rockset_results,
Expand Down Expand Up @@ -226,16 +225,6 @@ def mocked_read_merge_rules_raise(repo: Any, org: str, project: str) -> List[Mer
raise RuntimeError("testing")


def empty_flaky_rules() -> List[FlakyRule]:
return []


def xla_is_flaky_rules() -> List[FlakyRule]:
return [
FlakyRule("xla", ["FAILED: Build did NOT complete successfully"]),
]


def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
return [
MergeRule(
Expand All @@ -247,6 +236,7 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
"EasyCLA",
"pull / linux-bionic-py3_8-clang8-xla / build",
"pull / linux-bionic-py3_8-clang8-xla / test (xla, 1, 1, linux.4xlarge)",
"inductor / cuda11.8-py3.10-gcc7-sm86 / test (inductor_torchbench_dynamic, 1, 1, linux.g5.4xlarge.nvidia.gpu)",
],
ignore_flaky_failures=False,
),
Expand All @@ -268,7 +258,6 @@ def commit_message(self, ref: str) -> str:
return "super awsome commit message"


@mock.patch("trymerge.read_flaky_rules", side_effect=empty_flaky_rules)
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch(
Expand Down Expand Up @@ -677,55 +666,74 @@ def test_get_merge_base(self, *args: Any) -> None:
)
class TestBypassFailures(TestCase):
def test_get_classifications(self, *args: Any) -> None:
flaky_rules = [
# Try a regex rule
FlakyRule("distributed", ["##\\[error\\]The operation [wW]as .+"])
]
pr = GitHubPR("pytorch", "pytorch", 92863)
pr = GitHubPR("pytorch", "pytorch", 109584)
checks = pr.get_checkrun_conclusions()
checks = get_classifications(
pr.pr_num,
pr.project,
checks,
pr.last_commit()["oid"],
pr.get_merge_base(),
flaky_rules,
[],
)
self.assertTrue(
checks[
"pull / linux-bionic-py3_7-clang8-xla / test (xla, 1, 1, linux.4xlarge)"
"pull / linux-focal-py3.11-clang10 / test (dynamo, 1, 2, linux.2xlarge)"
].classification
== "BROKEN_TRUNK"
)
self.assertTrue(
checks[
"trunk / win-vs2019-cpu-py3 / test (default, 2, 3, windows.4xlarge.nonephemeral)"
].classification
== "BROKEN_TRUNK"
)
self.assertTrue(
checks[
"pull / linux-focal-py3.7-gcc7 / test (distributed, 1, 2, linux.2xlarge)"
"pull / linux-jammy-py3.8-gcc11 / test (distributed, 1, 2, linux.2xlarge)"
].classification
== "BROKEN_TRUNK"
)
self.assertTrue(
checks[
"pull / linux-focal-cuda11.8-py3.10-gcc9 / test (distributed, 1, 3, linux.8xlarge.nvidia.gpu)"
].classification
== "FLAKY"
)

# Set the threshold larger or equal to the number of ok failures
pending, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=2
checks, list(checks.keys()), ok_failed_checks_threshold=6
)
self.assertTrue(len(pending) == 0)
self.assertTrue(len(failed) == 0)
self.assertTrue(len(ignorable["FLAKY"]) == 1)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
self.assertTrue(len(ignorable["FLAKY"]) == 2)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 4)

# Not set any threshold, defaults to -1 to ignore all flaky and broken trunk failures
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
self.assertTrue(len(pending) == 0)
self.assertTrue(len(failed) == 0)
self.assertTrue(len(ignorable["FLAKY"]) == 1)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
self.assertTrue(len(ignorable["FLAKY"]) == 2)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 4)

# Set the threshold lower than the number of ok failures
pending, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=1
)
self.assertTrue(len(pending) == 0)
self.assertTrue(len(failed) == 2)
self.assertTrue(len(ignorable["FLAKY"]) == 1)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
self.assertTrue(len(failed) == 6)
self.assertTrue(len(ignorable["FLAKY"]) == 2)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 4)

# Set the threshold to 0 like when ignore_flaky_failures is on
pending, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=1
)
self.assertTrue(len(pending) == 0)
self.assertTrue(len(failed) == 6)
self.assertTrue(len(ignorable["FLAKY"]) == 2)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 4)

def test_get_classifications_similar_failures(self, *args: Any) -> None:
pr = GitHubPR("pytorch", "pytorch", 109750)
Expand All @@ -737,7 +745,6 @@ def test_get_classifications_similar_failures(self, *args: Any) -> None:
pr.last_commit()["oid"],
pr.get_merge_base(),
[],
[],
)
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
self.assertTrue(len(pending) == 0)
Expand All @@ -754,7 +761,6 @@ def test_get_classifications_unstable(self, *args: Any) -> None:
pr.last_commit()["oid"],
pr.get_merge_base(),
[],
[],
)
workflow_name = "linux-bionic-cuda12.1-py3.10-gcc9-bazel-test"
job_name = "build-and-test (default, 1, 1, linux.4xlarge.nvidia.gpu, unstable)"
Expand Down Expand Up @@ -812,7 +818,6 @@ def test_get_classifications_broken_trunk(self, *args: Any) -> None:
pr.last_commit()["oid"],
pr.get_merge_base(),
[],
[],
)

pending, failed, _ = categorize_checks(checks, list(checks.keys()))
Expand All @@ -832,35 +837,28 @@ def test_ignore_current(self, *args: Any) -> None:
# current checks takes place after other classifications: flaky, unstable,
# or broken trunk. Only actual new failures should be kept in the list of
# ignore current checks to use to record force merge with actual failures
flaky_rules = [
FlakyRule("distributed", ["##\\[error\\]The operation was canceled."])
]
flaky = (
"pull / linux-focal-py3.7-gcc7 / test (distributed, 1, 2, linux.2xlarge)"
)
flaky = "pull / linux-focal-cuda11.8-py3.10-gcc9 / test (distributed, 1, 3, linux.8xlarge.nvidia.gpu)"
broken_trunk = (
"pull / linux-bionic-py3_7-clang8-xla / test (xla, 1, 1, linux.4xlarge)"
"pull / linux-focal-py3.11-clang10 / test (dynamo, 1, 2, linux.2xlarge)"
)

pr = GitHubPR("pytorch", "pytorch", 92863)
pr = GitHubPR("pytorch", "pytorch", 109584)
checks = pr.get_checkrun_conclusions()

# No broken trunk or flaky rules, then all failures are ignored when ic is used
# No broken trunk or flaky as the merge base is not set, these failures are
# counted as ignore current when ic is used
checks = get_classifications(
pr.pr_num,
pr.project,
checks,
pr.last_commit()["oid"],
None,
[],
[broken_trunk, flaky],
)
self.assertTrue(checks[flaky].classification == "IGNORE_CURRENT_CHECK")
self.assertTrue(checks[broken_trunk].classification == "IGNORE_CURRENT_CHECK")
_, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=2
)
self.assertTrue(len(failed) == 0)
_, failed, ignorable = categorize_checks(checks, list(checks.keys()))
self.assertTrue(len(failed) == 4)
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 2)
self.assertTrue(len(ignorable["FLAKY"]) == 0)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 0)
Expand All @@ -874,46 +872,22 @@ def test_ignore_current(self, *args: Any) -> None:
checks,
pr.last_commit()["oid"],
pr.get_merge_base(),
flaky_rules,
[broken_trunk, flaky],
)
self.assertTrue(checks[flaky].classification == "FLAKY")
self.assertTrue(checks[broken_trunk].classification == "BROKEN_TRUNK")
_, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=2
)
_, failed, ignorable = categorize_checks(checks, list(checks.keys()))
self.assertTrue(len(failed) == 0)
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 0)
self.assertTrue(len(ignorable["FLAKY"]) == 1)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)

# Broken trunk takes precedence over ignore current (no flaky rule is set here)
checks = get_classifications(
pr.pr_num,
pr.project,
checks,
pr.last_commit()["oid"],
pr.get_merge_base(),
[],
[broken_trunk, flaky],
)
self.assertTrue(checks[flaky].classification == "IGNORE_CURRENT_CHECK")
self.assertTrue(checks[broken_trunk].classification == "BROKEN_TRUNK")
_, failed, ignorable = categorize_checks(
checks, list(checks.keys()), ok_failed_checks_threshold=2
)
self.assertTrue(len(failed) == 0)
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 1)
self.assertTrue(len(ignorable["FLAKY"]) == 0)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
self.assertTrue(len(ignorable["FLAKY"]) == 2)
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 4)

@mock.patch("trymerge.read_flaky_rules", side_effect=xla_is_flaky_rules)
@mock.patch("trymerge.read_merge_rules", side_effect=xla_merge_rules)
def test_dont_ignore_flaky_failures(self, *args: Any) -> None:
"""
Regression test for https://github.com/pytorch/test-infra/issues/4126
"""
pr = GitHubPR("pytorch", "pytorch", 100369)
pr = GitHubPR("pytorch", "pytorch", 105312)
repo = DummyGitRepo()
# Check that failure is classified as flaky but still raises exception
with warnings.catch_warnings(record=True) as w, self.assertRaises(RuntimeError):
Expand Down
48 changes: 5 additions & 43 deletions .github/scripts/trymerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,6 @@ def __init__(self, name: str, url: str, status: Optional[str]):
self.jobs: JobNameToStateDict = {}


class FlakyRule:
def __init__(self, name: str, captures: List[str]):
self.name = re.compile(name)
self.captures = [re.compile(r) for r in captures]

def matches(self, job: Optional[Dict[str, Any]]) -> bool:
return (
job is not None
and self.name.search(job.get("name", "")) is not None
and job.get("failure_captures") is not None
and all(
any(
r.search(capture) is not None
for capture in job.get("failure_captures", [])
)
for r in self.captures
)
)

def __repr__(self) -> str:
return f"FlakyRule[name='{self.name}', captures={self.captures}]"


GH_PR_REVIEWS_FRAGMENT = """
fragment PRReviews on PullRequestReviewConnection {
nodes {
Expand Down Expand Up @@ -1261,13 +1238,6 @@ def read_merge_rules(
return [MergeRule(**x) for x in rc]


@lru_cache(maxsize=None)
def read_flaky_rules() -> List[FlakyRule]:
# NOTE: This is currently hardcoded, can be extended to do per repo rules
FLAKY_RULES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/flaky-rules.json"
return _get_flaky_rules(FLAKY_RULES_URL)


def find_matching_merge_rule(
pr: GitHubPR,
repo: Optional[GitRepo] = None,
Expand Down Expand Up @@ -1298,7 +1268,6 @@ def find_matching_merge_rule(
reject_reason = f"No rule found to match PR. Please [report]{issue_link} this issue to DevX team."

rules = read_merge_rules(repo, pr.org, pr.project)
flaky_rules = read_flaky_rules()
if not rules:
reject_reason = f"Rejecting the merge as no rules are defined for the repository in {MERGE_RULE_PATH}"
raise RuntimeError(reject_reason)
Expand All @@ -1318,7 +1287,6 @@ def find_matching_merge_rule(
checks,
pr.last_commit()["oid"],
base_rev,
flaky_rules,
ignore_current_checks=ignore_current_checks,
)

Expand Down Expand Up @@ -1469,11 +1437,6 @@ def checks_to_markdown_bullets(
]


@retries_decorator(rc=[])
def _get_flaky_rules(url: str) -> List[FlakyRule]:
return [FlakyRule(**rule) for rule in gh_fetch_json_list(url)]


@retries_decorator()
def save_merge_record(
collection: str,
Expand Down Expand Up @@ -1620,7 +1583,6 @@ def is_broken_trunk(

def is_flaky(
head_job: Optional[Dict[str, Any]],
flaky_rules: List[FlakyRule],
drci_classifications: Any,
) -> bool:
if not head_job or not drci_classifications:
Expand All @@ -1630,7 +1592,7 @@ def is_flaky(
return any(
head_job["name"] == flaky["name"]
for flaky in drci_classifications.get("FLAKY", [])
) or any(rule.matches(head_job) for rule in flaky_rules)
)


def get_classifications(
Expand All @@ -1639,7 +1601,6 @@ def get_classifications(
checks: Dict[str, JobCheckState],
head_sha: str,
merge_base: Optional[str],
flaky_rules: List[FlakyRule],
ignore_current_checks: Optional[List[str]],
) -> Dict[str, JobCheckState]:
# Group by job name without shard id and suffix to correctly identify broken
Expand Down Expand Up @@ -1716,6 +1677,9 @@ def insert(
name_no_suffix = remove_job_name_suffix(name)
head_sha_job = head_sha_jobs.get(name_no_suffix, {}).get(name)

# NB: It's important to note that when it comes to ghstack and broken trunk classification,
# the current implementation of trymerge uses the base of the current PR in the stack, i.e.
# gh/user/xx/base, while Dr.CI uses the base of the whole stack. Both works though
if is_broken_trunk(head_sha_job, merge_base_jobs.get(name_no_suffix)):
checks_with_classifications[name] = JobCheckState(
check.name,
Expand All @@ -1727,7 +1691,7 @@ def insert(
)
continue

elif is_flaky(head_sha_job, flaky_rules, drci_classifications):
elif is_flaky(head_sha_job, drci_classifications):
checks_with_classifications[name] = JobCheckState(
check.name, check.url, check.status, "FLAKY", check.job_id, check.title
)
Expand Down Expand Up @@ -2019,7 +1983,6 @@ def merge(
start_time = time.time()
last_exception = ""
elapsed_time = 0.0
flaky_rules = read_flaky_rules()
ignore_current_checks = [
x[0] for x in ignore_current_checks_info
] # convert to List[str] for convenience
Expand Down Expand Up @@ -2057,7 +2020,6 @@ def merge(
checks,
pr.last_commit()["oid"],
pr.get_merge_base(),
flaky_rules,
ignore_current_checks=ignore_current_checks,
)
pending, failing, _ = categorize_checks(
Expand Down

0 comments on commit 81a7445

Please sign in to comment.