ci: Speed up QA tests a bit (semgrep#5526)

* ci: Clean up public repos list * ci: Refactor test_semgrep_rules * Restore public repo cache only for public repo tests * Unify pipenv versions * Remove build-core dependency from test-core
QPC-github · Jun 14, 2022 · 4ef3646 · 4ef3646
1 parent 6660bc9
commit 4ef3646
Show file tree

Hide file tree

Showing 9 changed files with 63 additions and 78 deletions.
diff --git a/.github/workflows/benchmark-parameterized.yml b/.github/workflows/benchmark-parameterized.yml
@@ -43,7 +43,7 @@ jobs:
         run: |
           cd semgrep
           export PATH=/github/home/.local/bin:$PATH
-          pip3 install pipenv==2021.5.29
+          pip3 install pipenv==2022.6.7
           pipenv install --dev
       - name: Run benchmark
         run: |

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -44,7 +44,7 @@ jobs:
         run: |
           cd semgrep
           export PATH=/github/home/.local/bin:$PATH
-          pip3 install pipenv==2021.5.29
+          pip3 install pipenv==2022.6.7
           pipenv install --dev
       - name: Run benchmark
         run: |

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -139,7 +139,7 @@ jobs:
     container: returntocorp/ocaml:alpine-2022-06-09
     steps:
       - name: Install pipenv
-        run: sudo pip install pipenv
+        run: sudo pip install pipenv==2022.6.7
       - name: Pre-checkout fixes
         run: |
           sudo chmod -R 777 /github
@@ -169,7 +169,7 @@ jobs:
     runs-on: macos-10.15
     steps:
       - name: Install pipenv
-        run: sudo python3 -m pip install pipenv
+        run: sudo python3 -m pip install pipenv==2022.6.7
       - name: Make checkout speedy
         run: git config --global fetch.parallel 50
       - name: Checkout

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -45,7 +45,7 @@ jobs:
           submodules: recursive
       - name: Install pipenv
         if: steps.core-cache.outputs.cache-hit != 'true'
-        run: sudo pip install pipenv
+        run: sudo pip install pipenv==2022.6.7
       - name: Cache OCaml build files
         if: steps.core-cache.outputs.cache-hit != 'true'
         uses: actions/cache@v3
@@ -154,13 +154,13 @@ jobs:
           cache-to: type=gha,dest=/tmp/.buildx-cache,mode=max
 
   test-core:
+    # TODO: use the artifact of build-core in this job
     name: test semgrep-core
     runs-on: ubuntu-latest
     container: returntocorp/ocaml:alpine-2022-06-09
-    needs: [build-core] # save some CPU time by waiting till build cache is populated in that job
     steps:
       - name: Install pipenv
-        run: sudo pip install pipenv
+        run: sudo pip install pipenv==2022.6.7
       - name: Pre-checkout fixes
         run: |
           sudo chmod -R 777 /github
@@ -203,8 +203,7 @@ jobs:
           curl --fail -L -X POST "https://dashboard.semgrep.dev/api/metric/semgrep.core.test-run-time-seconds.num" -d "$TEST_RUN_TIME"
       - name: Report Number of Tests Stats
         if: github.ref == 'refs/heads/develop'
-        run: |
-          ./semgrep-core/tests/report_test_metrics.sh
+        run: ./semgrep-core/tests/report_test_metrics.sh
 
       # TODO: move this to a stable host for more reliable results.
       #
@@ -232,7 +231,7 @@ jobs:
         python: ["3.7", "3.8", "3.9", "3.10", "3.11.0-beta - 3.11.0"]
     steps:
       - name: Install pipenv
-        run: pipx install pipenv==2022.5.2
+        run: pipx install pipenv==2022.6.7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Fetch semgrep-cli submodules
@@ -303,7 +302,7 @@ jobs:
         test_name: ["public_repos", "semgrep_rules_repo"]
     steps:
       - name: Install pipenv
-        run: pipx install pipenv==2022.5.2
+        run: pipx install pipenv==2022.6.7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Fetch semgrep-cli submodules
@@ -327,19 +326,20 @@ jobs:
           export PATH=/github/home/.local/bin:$PATH
           pipenv install --dev
       - uses: actions/cache@v3
+        if: matrix.test_name == 'public_repos'
         with:
-          path: ~/.cache/semgrep-cache
-          key: semgrep-repo-cache-test
+          path: ~/.cache/qa-public-repos
+          key: qa-public-repos-${{ hashFiles('semgrep/tests/qa/*public_repos*') }}
       - run: |
-          mkdir -p ~/.cache/semgrep-cache
-          touch ~/.cache/semgrep-cache/ok
+          mkdir -p ~/.cache/qa-public-repos
+          touch ~/.cache/qa-public-repos/ok
       - name: Test semgrep
         working-directory: semgrep
         run: |
           export PATH=/github/home/.local/bin:$PATH
           pipenv run pytest -n auto -vv --tb=short --durations=0 tests/qa/test_${{ matrix.test_name }}.py
         env:
-          QA_TESTS_CACHE_PATH: ~/.cache/semgrep-cache
+          QA_TESTS_CACHE_PATH: ~/.cache/qa-public-repos
 
   # Run abbreviated version of benchmarks to check that they work
   benchmarks-lite:
@@ -348,7 +348,7 @@ jobs:
     needs: [build-core]
     steps:
       - name: Install pipenv
-        run: pipx install pipenv==2022.5.2
+        run: pipx install pipenv==2022.6.7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Fetch semgrep-cli submodules
@@ -384,7 +384,7 @@ jobs:
     needs: [build-core]
     steps:
       - name: Install pipenv
-        run: pipx install pipenv==2022.5.2
+        run: pipx install pipenv==2022.6.7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Fetch semgrep-cli submodules

diff --git a/Dockerfile b/Dockerfile
@@ -22,7 +22,7 @@ USER root
 # for ocaml-pcre now used in semgrep-core
 # TODO: update root image to include python 3.9
 RUN apk add --no-cache pcre-dev python3 &&\
-     pip install --no-cache-dir pipenv==2021.11.23
+     pip install --no-cache-dir pipenv==2022.6.7
 
 USER user
 

diff --git a/semgrep-core/tests/dockerfile/parsing/semgrep.dockerfile b/semgrep-core/tests/dockerfile/parsing/semgrep.dockerfile
@@ -22,7 +22,7 @@ USER root
 # for ocaml-pcre now used in semgrep-core
 # TODO: update root image to include python 3.9
 RUN apk add --update --no-cache pcre-dev python3
-RUN pip install --no-cache-dir pipenv==2021.11.23
+RUN pip install --no-cache-dir pipenv==2022.6.7
 
 USER user
 WORKDIR /home/user

diff --git a/semgrep/semgrep/config_resolver.py b/semgrep/semgrep/config_resolver.py
@@ -378,7 +378,10 @@ def validate_single_rule(
 def manual_config(
     pattern: str, lang: str, replacement: Optional[str]
 ) -> Dict[str, YamlTree]:
-    # TODO remove when using sgrep -e ... -l ... instead of this hacked config
+    """Create a fake rule when we only have a pattern and language
+
+    This is used when someone calls `semgrep scan -e print -l py`
+    """
     pattern_span = Span.from_string(pattern, filename="CLI Input")
     pattern_tree = YamlTree[str](value=pattern, span=pattern_span)
     error_span = Span.from_string(

diff --git a/semgrep/tests/qa/public_repos.py b/semgrep/tests/qa/public_repos.py
@@ -17,9 +17,8 @@ def as_param(self):
         return pytest.param(
             self,
             id=self.short_url,
-            marks=pytest.mark.xfail(xfail_reason=self.xfail_reason, strict=True)
-            if self.xfail_reason is not None
-            else [],
+            # we haven't looked at these xfails for a year so might as well just skip them in CI
+            marks=pytest.mark.skip if self.xfail_reason is not None else [],
         )
 
 
@@ -109,16 +108,6 @@ def as_param(self):
     Repo("https://github.com/dropbox/whitegold"),
     Repo("https://github.com/dropbox/ykfipsconf"),
     Repo("https://github.com/dropbox/zinger"),
-    Repo("https://github.com/returntocorp/badwords"),
-    Repo("https://github.com/returntocorp/bento-report"),
-    Repo("https://github.com/returntocorp/check-docs"),
-    Repo("https://github.com/returntocorp/cli", xfail_reason="unknown"),
-    Repo("https://github.com/returntocorp/flake8-click"),
-    Repo("https://github.com/returntocorp/flake8-flask"),
-    Repo("https://github.com/returntocorp/flake8-requests"),
-    Repo("https://github.com/returntocorp/inputset-generator"),
-    Repo("https://github.com/returntocorp/semgrep-action"),
-    Repo("https://github.com/returntocorp/semgrep-rules", xfail_reason="unknown"),
     Repo("https://github.com/seemoo-lab/opendrop"),
     Repo("https://github.com/lightstep/lightstep-tracer-python"),
     Repo("https://github.com/draios/sysdig-inspect"),

diff --git a/semgrep/tests/qa/test_semgrep_rules_repo.py b/semgrep/tests/qa/test_semgrep_rules_repo.py
@@ -1,68 +1,61 @@
 import shutil
 import subprocess
-import sys
 
 import pytest
+from click.testing import CliRunner
 
-
-def _fail_subprocess_on_error(cmd):
-    output = subprocess.run(
-        cmd,
-        capture_output=True,
-        encoding="utf-8",
-    )
-
-    if output.returncode != 0:
-        pytest.fail(f"Failed running cmd={cmd}" + output.stdout + output.stderr)
+from semgrep.cli import cli
 
 
-@pytest.mark.slow
-def test_semgrep_rules_repo(run_semgrep_in_tmp):
+@pytest.fixture(scope="session", autouse=True)
+def in_semgrep_rules_repo(tmpdir_factory):
+    monkeypatch = pytest.MonkeyPatch()
+    repo_dir = tmpdir_factory.mktemp("semgrep-rules")
     subprocess.check_output(
-        ["git", "clone", "--depth=1", "https://github.com/returntocorp/semgrep-rules"]
-    )
-
-    # Remove subdir that doesnt contain rules
-    shutil.rmtree("./semgrep-rules/stats")
-
-    _fail_subprocess_on_error(
         [
-            sys.executable,
-            "-m",
-            "semgrep",
-            "--generate-config",
-            "--disable-version-check",
-            "--metrics",
-            "off",
+            "git",
+            "clone",
+            "--depth=1",
+            "https://github.com/returntocorp/semgrep-rules",
+            repo_dir,
         ]
     )
+    # Remove subdir that doesnt contain rules
+    shutil.rmtree(repo_dir / "stats")
+    monkeypatch.chdir(repo_dir)
+    yield
+    monkeypatch.undo()
+
 
-    _fail_subprocess_on_error(
+@pytest.mark.slow
+def test_semgrep_rules_repo__test(in_semgrep_rules_repo):
+    runner = CliRunner()
+    results = runner.invoke(
+        cli,
         [
-            sys.executable,
-            "-m",
-            "semgrep",
             "--disable-version-check",
-            "--metrics",
-            "off",
-            "--dangerously-allow-arbitrary-code-execution-from-rules",
+            "--metrics=off",
             "--strict",
             "--test",
             "--test-ignore-todo",
-            "semgrep-rules",
         ],
     )
+    print(results.output)
+    assert results.exit_code == 0
+
 
-    _fail_subprocess_on_error(
+@pytest.mark.slow
+def test_semgrep_rules_repo__validate(in_semgrep_rules_repo):
+    runner = CliRunner()
+    results = runner.invoke(
+        cli,
         [
-            sys.executable,
-            "-m",
-            "semgrep",
             "--disable-version-check",
-            "--metrics",
-            "off",
+            "--metrics=off",
+            "--strict",
             "--validate",
-            "--config",
-            "semgrep-rules",
-        ]
+            "--config=.",
+        ],
     )
+    print(results.output)
+    assert results.exit_code == 0