feat: add personas (woodruffw#226)

Zeitsperre · Dec 3, 2024 · 7644555 · 7644555
1 parent 8c8fecb
commit 7644555
Show file tree

Hide file tree

Showing 23 changed files with 383 additions and 108 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -19,8 +19,9 @@ anyhow = "1.0.93"
 clap = { version = "4.5.21", features = ["derive", "env"] }
 clap-verbosity-flag = "3.0.0"
 env_logger = "0.11.5"
-github-actions-models = "0.11.0"
+github-actions-models = "0.12.0"
 human-panic = "2.0.1"
+indexmap = "2.7.0"
 indicatif = "0.17.9"
 itertools = "0.13.0"
 log = "0.4.22"

diff --git a/docs/snippets/help.txt b/docs/snippets/help.txt
@@ -3,34 +3,69 @@ Static analysis for GitHub Actions
 Usage: zizmor [OPTIONS] <INPUTS>...
 
 Arguments:
-  <INPUTS>...  The workflow filenames or directories to audit
+  <INPUTS>...
+          The workflow filenames or directories to audit
 
 Options:
   -p, --pedantic
-          Emit findings even when the context suggests an explicit security decision made by the user
+          Emit 'pedantic' findings.
+
+          This is an alias for --persona=pedantic.
+
+      --persona <PERSONA>
+          The persona to use while auditing
+
+          [default: regular]
+
+          Possible values:
+          - auditor:  The "auditor" persona (false positives OK)
+          - pedantic: The "pedantic" persona (code smells OK)
+          - regular:  The "regular" persona (minimal false positives)
+
   -o, --offline
           Only perform audits that don't require network access
+
   -v, --verbose...
           Increase logging verbosity
+
   -q, --quiet...
           Decrease logging verbosity
+
   -n, --no-progress
           Disable the progress bar. This is useful primarily when running with a high verbosity level, as the two will fight for stderr
+
       --gh-token <GH_TOKEN>
-          The GitHub API token to use [env: GH_TOKEN=]
+          The GitHub API token to use
+
+          [env: GH_TOKEN=]
+
       --format <FORMAT>
-          The output format to emit. By default, plain text will be emitted [possible values: plain, json, sarif]
+          The output format to emit. By default, plain text will be emitted
+
+          [default: plain]
+          [possible values: plain, json, sarif]
+
   -c, --config <CONFIG>
           The configuration file to load. By default, any config will be discovered relative to $CWD
+
       --no-config
           Disable all configuration loading
+
       --no-exit-codes
           Disable all error codes besides success and tool failure
+
       --min-severity <MIN_SEVERITY>
-          Filter all results below this severity [possible values: unknown, informational, low, medium, high]
+          Filter all results below this severity
+
+          [possible values: unknown, informational, low, medium, high]
+
       --min-confidence <MIN_CONFIDENCE>
-          Filter all results below this confidence [possible values: unknown, low, medium, high]
+          Filter all results below this confidence
+
+          [possible values: unknown, low, medium, high]
+
   -h, --help
-          Print help
+          Print help (see a summary with '-h')
+
   -V, --version
           Print version
diff --git a/docs/usage.md b/docs/usage.md
@@ -69,6 +69,94 @@ See [Integration](#integration) for suggestions on when to use each format.
 
 All other exit codes are currently reserved.
 
+## Using personas
+
+!!! tip
+
+    `--persona=...` is available in `v0.7.0` and later.
+
+`zizmor` comes with three pre-defined "personas," which dictate how
+sensitive `zizmor`'s analyses are:
+
+* The _regular persona_: the user wants high-signal, low-noise, actionable
+  security findings. This persona is best for ordinary local use as well as use
+  in most CI/CD setups, which is why it's the default.
+
+    !!! note
+
+        This persona can be made explicit with `--persona=regular`,
+        although this is not required.
+
+
+* The _pedantic persona_, enabled by `--persona=pedantic`: the user wants
+  *code smells* in addition to regular, actionable security findings.
+
+    This persona is ideal for finding things that are a good idea
+    to clean up or resolve, but are likely not immediately actionable
+    security findings (or are actionable, but indicate a intentional
+    security decision by the workflow author).
+
+    For example, using the pedantic persona will flag the following
+    with an `unpinned-uses` finding, since it uses a symbolic reference
+    as its pin instead of a hashed pin:
+
+    ```yaml
+    uses: actions/checkout@v3
+    ```
+
+    produces:
+
+    ```console
+    $ zizmor --pedantic tests/test-data/unpinned-uses.yml
+    help[unpinned-uses]: unpinned action reference
+      --> tests/test-data/unpinned-uses.yml:14:9
+       |
+    14 |       - uses: actions/checkout@v3
+       |         ------------------------- help: action is not pinned to a hash ref
+       |
+       = note: audit confidence → High
+    ```
+
+    !!! tip
+
+        This persona can also be enabled with `--pedantic`, which is
+        an alias for `--persona=pedantic`.
+
+* The _auditor persona_, enabled by `--persona=auditor`: the user wants
+  *everything* flagged by `zizmor`, including findings that are likely
+  to be false positives.
+
+    This persona is ideal for security auditors and code reviewers, who
+    want to go through `zizmor`'s findings manually with a fine-toothed comb.
+
+    Some audits, notably `self-hosted-runner`, *only* produce auditor-level
+    results. This is because these audits require runtime context that `zizmor`
+    lacks access to by design, meaning that their results are always
+    subject to false positives.
+
+    For example, with the default persona:
+
+    ```console
+    $ zizmor tests/test-data/self-hosted.yml
+    🌈 completed self-hosted.yml
+    No findings to report. Good job! (1 suppressed)
+    ```
+
+    and with `--persona=auditor`:
+
+    ```console
+    $ zizmor --persona=auditor tests/test-data/self-hosted.yml
+    note[self-hosted-runner]: runs on a self-hosted runner
+      --> tests/test-data/self-hosted.yml:8:5
+        |
+      8 |     runs-on: [self-hosted, my-ubuntu-box]
+        |     ------------------------------------- note: self-hosted runner used here
+        |
+        = note: audit confidence → High
+
+      1 finding: 1 unknown, 0 informational, 0 low, 0 medium, 0 high
+    ```
+
 ## Filtering results
 
 There are two straightforward ways to filter `zizmor`'s results:

diff --git a/src/audit/artipacked.rs b/src/audit/artipacked.rs
@@ -9,14 +9,12 @@ use itertools::Itertools;
 
 use super::{audit_meta, WorkflowAudit};
 use crate::{
-    finding::{Confidence, Finding, Severity},
+    finding::{Confidence, Finding, Persona, Severity},
     state::AuditState,
 };
 use crate::{models::Workflow, utils::split_patterns};
 
-pub(crate) struct Artipacked {
-    pub(crate) state: AuditState,
-}
+pub(crate) struct Artipacked;
 
 audit_meta!(
     Artipacked,
@@ -47,8 +45,8 @@ impl Artipacked {
 }
 
 impl WorkflowAudit for Artipacked {
-    fn new(state: AuditState) -> Result<Self> {
-        Ok(Self { state })
+    fn new(_state: AuditState) -> Result<Self> {
+        Ok(Self)
     }
 
     fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
@@ -76,15 +74,11 @@ impl WorkflowAudit for Artipacked {
                         Some(EnvValue::Boolean(true)) => {
                             // If a user explicitly sets `persist-credentials: true`,
                             // they probably mean it. Only report if being pedantic.
-                            if self.state.pedantic {
-                                vulnerable_checkouts.push(step)
-                            } else {
-                                continue;
-                            }
+                            vulnerable_checkouts.push((step, Persona::Pedantic))
                         }
                         // TODO: handle expressions and literal strings here.
                         // persist-credentials is true by default.
-                        _ => vulnerable_checkouts.push(step),
+                        _ => vulnerable_checkouts.push((step, Persona::default())),
                     }
                 } else if uses.starts_with("actions/upload-artifact") {
                     let Some(EnvValue::String(path)) = with.get("path") else {
@@ -102,11 +96,12 @@ impl WorkflowAudit for Artipacked {
             if vulnerable_uploads.is_empty() {
                 // If we have no vulnerable uploads, then emit lower-confidence
                 // findings for just the checkout steps.
-                for checkout in vulnerable_checkouts {
+                for (checkout, persona) in vulnerable_checkouts {
                     findings.push(
                         Self::finding()
                             .severity(Severity::Medium)
                             .confidence(Confidence::Low)
+                            .persona(persona)
                             .add_location(
                                 checkout
                                     .location()
@@ -119,7 +114,7 @@ impl WorkflowAudit for Artipacked {
                 // Select only pairs where the vulnerable checkout precedes the
                 // vulnerable upload. There are more efficient ways to do this than
                 // a cartesian product, but this way is simple.
-                for (checkout, upload) in vulnerable_checkouts
+                for ((checkout, persona), upload) in vulnerable_checkouts
                     .into_iter()
                     .cartesian_product(vulnerable_uploads.into_iter())
                 {
@@ -128,6 +123,7 @@ impl WorkflowAudit for Artipacked {
                             Self::finding()
                                 .severity(Severity::High)
                                 .confidence(Confidence::High)
+                                .persona(persona)
                                 .add_location(
                                     checkout
                                         .location()

diff --git a/src/audit/github_env.rs b/src/audit/github_env.rs
@@ -172,7 +172,6 @@ mod tests {
             ("something | tee \"${$OTHER_ENV}\" # not $GITHUB_ENV", false),
         ] {
             let audit_state = AuditState {
-                pedantic: false,
                 offline: false,
                 gh_token: None,
                 caches: Caches::new(),

diff --git a/src/audit/self_hosted_runner.rs b/src/audit/self_hosted_runner.rs
@@ -2,11 +2,11 @@
 //! which are frequently unsafe to use in public repositories
 //! due to the potential for persistence between workflow runs.
 //!
-//! This audit is "pedantic" only, since zizmor can't detect
+//! This audit is "auditor" only, since zizmor can't detect
 //! whether self-hosted runners are ephemeral or not.
 
 use crate::{
-    finding::{Confidence, Severity},
+    finding::{Confidence, Persona, Severity},
     AuditState,
 };
 
@@ -18,9 +18,7 @@ use github_actions_models::{
 
 use super::{audit_meta, WorkflowAudit};
 
-pub(crate) struct SelfHostedRunner {
-    pub(crate) state: AuditState,
-}
+pub(crate) struct SelfHostedRunner;
 
 audit_meta!(
     SelfHostedRunner,
@@ -29,11 +27,11 @@ audit_meta!(
 );
 
 impl WorkflowAudit for SelfHostedRunner {
-    fn new(state: AuditState) -> anyhow::Result<Self>
+    fn new(_state: AuditState) -> anyhow::Result<Self>
     where
         Self: Sized,
     {
-        Ok(Self { state })
+        Ok(Self)
     }
 
     fn audit<'w>(
@@ -42,11 +40,6 @@ impl WorkflowAudit for SelfHostedRunner {
     ) -> Result<Vec<crate::finding::Finding<'w>>> {
         let mut results = vec![];
 
-        if !self.state.pedantic {
-            log::info!("skipping self-hosted runner checks");
-            return Ok(results);
-        }
-
         for job in workflow.jobs() {
             let Job::NormalJob(normal) = *job else {
                 continue;
@@ -66,6 +59,7 @@ impl WorkflowAudit for SelfHostedRunner {
                                 Self::finding()
                                     .confidence(Confidence::High)
                                     .severity(Severity::Unknown)
+                                    .persona(Persona::Auditor)
                                     .add_location(
                                         job.location()
                                             .with_keys(&["runs-on".into()])
@@ -82,6 +76,7 @@ impl WorkflowAudit for SelfHostedRunner {
                                 Self::finding()
                                     .confidence(Confidence::Low)
                                     .severity(Severity::Unknown)
+                                    .persona(Persona::Auditor)
                                     .add_location(
                                         job.location().with_keys(&["runs-on".into()]).annotated(
                                             "expression may expand into a self-hosted runner",
@@ -101,6 +96,7 @@ impl WorkflowAudit for SelfHostedRunner {
                     Self::finding()
                         .confidence(Confidence::Low)
                         .severity(Severity::Unknown)
+                        .persona(Persona::Auditor)
                         .add_location(
                             job.location()
                                 .with_keys(&["runs-on".into()])
@@ -114,6 +110,7 @@ impl WorkflowAudit for SelfHostedRunner {
                     Self::finding()
                         .confidence(Confidence::Low)
                         .severity(Severity::Unknown)
+                        .persona(Persona::Auditor)
                         .add_location(
                             job.location()
                                 .with_keys(&["runs-on".into()])