Skip to content

Commit

Permalink
Feat: Add link checker to CI workflow (withastro#476)
Browse files Browse the repository at this point in the history
* Feat: Add link checker to CI workflow

* Fix line number, add location to message

* Prevent annotating partial href matches

* Improve annotation summary, get ready for PR

* Wrap overly long line

* Minor improvements

Co-authored-by: Chris Swithinbank <[email protected]>
  • Loading branch information
hippotastic and delucis authored May 7, 2022
1 parent 2241877 commit 1a25683
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 16 deletions.
30 changes: 17 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,35 @@ on:
push:
branches: [main]
pull_request:
branches: [main]
branches: [main]

# Automatically cancel in-progress actions on the same branch
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request_target' && github.head_ref || github.ref }}
cancel-in-progress: true

jobs:
lint:
slugcheck:
name: Check for Mismatched Slugs
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup PNPM
uses: pnpm/[email protected]
- name: Install Tools & Dependencies
uses: ./.github/actions/install

- name: Setup Node.js 16.x
uses: actions/setup-node@v2
with:
node-version: 16.x
cache: 'pnpm'
- name: Run Check
run: pnpm run lint:slugcheck
linkcheck:
name: Check Links
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install Dependencies
run: pnpm install
- name: Install Tools & Dependencies
uses: ./.github/actions/install

- name: Check for mismatched slugs
run: pnpm run lint:slugcheck
- name: Run Check
run: pnpm run lint:linkcheck
118 changes: 115 additions & 3 deletions scripts/lint-linkcheck.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ import path from 'path';
import fs from 'fs';
import kleur from 'kleur';
import htmlparser2 from 'htmlparser2';
import core from '@actions/core';

/**
* Contains all link checking logic.
*/
class BrokenLinkChecker {
constructor ({ baseUrl, buildOutputDir }) {
constructor ({ baseUrl, buildOutputDir, pageSourceDir }) {
this.baseUrl = baseUrl;
this.buildOutputDir = buildOutputDir;
this.pageSourceDir = pageSourceDir;
}

/**
Expand All @@ -26,8 +28,19 @@ class BrokenLinkChecker {
// Find all broken links
const brokenLinks = this.findBrokenLinks(pages);

// Output the result
// Output the result to the console
this.outputResult(brokenLinks);

if (brokenLinks.length > 0) {
// If we're being run by a GitHub CI workflow, try to output annotations
// that show the locations of the broken links in the source files
if (process.env.CI) {
this.outputSourceFileAnnotations(brokenLinks);
}

// Let our caller know that we found errors
process.exitCode = 1;
}
}

/**
Expand Down Expand Up @@ -129,6 +142,7 @@ class BrokenLinkChecker {
brokenLinks.push({
page,
href: url.href,
unresolvedHref: linkHref,
isMissingPage,
isMissingHash,
});
Expand Down Expand Up @@ -162,7 +176,7 @@ class BrokenLinkChecker {
console.log();

const summary = [
`*** Found ${totalBroken} broken ${totalBroken === 1 ? 'link' : 'links'} in total:`,
`*** Found ${totalBroken} broken ${totalBroken === 1 ? 'link' : 'links'} in build output:`,
` ${prefixPage} ${brokenPageCount} broken page ${brokenPageCount === 1 ? 'link' : 'links'}`,
` ${prefixHash} ${brokenHashCount} broken fragment ${brokenHashCount === 1 ? 'link' : 'links'}`,
];
Expand All @@ -173,6 +187,61 @@ class BrokenLinkChecker {
console.log();
}

outputSourceFileAnnotations (brokenLinks) {
const annotations = [];

// Collect all unique pathnames that had broken links
const pathnames = new Set(brokenLinks.map(brokenLink => brokenLink.page.pathname));

// Go through the collected pathnames
pathnames.forEach(pathname => {
// Try to find the Markdown source file for the current pathname
let sourceFilePath = this.tryFindSourceFileForPathname(pathname);

// If we could not find the source file, we can't create annotations for it
if (!sourceFilePath)
return;

// Load the source file
sourceFilePath = sourceFilePath.replace(/\\/g, '/');
const sourceFileContents = fs.readFileSync(sourceFilePath, 'utf8');
const lines = sourceFileContents.split(/\r?\n/);

// Try to locate all broken links in the source file and output error annotations
// including line and column numbers
const brokenLinksOnCurrentPage = brokenLinks
.filter(brokenLink => brokenLink.page.pathname === pathname);
lines.forEach((line, idx) => {
const lineNumber = idx + 1;
brokenLinksOnCurrentPage.forEach(brokenLink => {
const startColumn = this.indexOfHref(line, brokenLink.unresolvedHref);
if (startColumn === -1)
return;

const message = `Broken ${brokenLink.isMissingHash ? 'fragment' : 'page'} ` +
`link in ${sourceFilePath}, line ${lineNumber}: ${brokenLink.href}`;
annotations.push({
message,
location: {
file: sourceFilePath,
startLine: lineNumber,
startColumn,
endColumn: startColumn + brokenLink.unresolvedHref.length
}
});
});
});
});

// Always output a summary first because GitHub only displays the first 10 annotations
const totalLines = annotations.length;
core.error(`Found ${totalLines} ${totalLines === 1 ? 'line' : 'lines'} containing ` +
`broken links in Markdown sources, please check changed files view`);

// Now output all line annotations
annotations.forEach(annotation => core.error(annotation.message, annotation.location));
}

pathnameToHref (pathname) {
const url = new URL(pathname, this.baseUrl);
return url.href;
Expand All @@ -181,12 +250,55 @@ class BrokenLinkChecker {
pathnameToHtmlFilePath (pathname) {
return path.join(this.buildOutputDir, pathname, 'index.html');
}

/**
* Attempts to find a Markdown source file for the given `pathname`.
*
* Example: Given a pathname of `/en/some-page` or `/en/some-page/`,
* searches for the source file in the following locations
* and returns the first matching path:
* - `${this.pageSourceDir}/en/some-page.md`
* - `${this.pageSourceDir}/en/some-page/index.md`
*
* If no existing file is found, returns `undefined`.
*/
tryFindSourceFileForPathname (pathname) {
const possibleSourceFilePaths = [
path.join(this.pageSourceDir, pathname, '.') + '.md',
path.join(this.pageSourceDir, pathname, 'index.md'),
];
return possibleSourceFilePaths.find(possiblePath => fs.existsSync(possiblePath));
}

/**
* Attempts to find the given link `href` inside `input` and returns its index on a match.
*
* Prevents false positive partial matches (like an href of `/en/install` matching
* an input containing `/en/install/auto`) by requiring the characters surrounding a match
* not to be a part of URLs in Markdown.
*/
indexOfHref (input, href) {
let i = input.indexOf(href);
while (i !== -1) {
// Get the characters surrounding the current match (if any)
let charBefore = input[i - 1] || '';
let charAfter = input[i + href.length] || '';
// If both characters are not a part of URLs in Markdown,
// we have a proper (non-partial) match, so return the index
if ((charBefore + charAfter).match(/^[\s"'()[\],.]*$/))
return i;
// Otherwise, keep searching for other matches
i = input.indexOf(href, i + 1);
}
return -1;
}
}

// Use our class to check for broken links
const brokenLinkChecker = new BrokenLinkChecker({
baseUrl: 'https://docs.astro.build',
buildOutputDir: './dist',
pageSourceDir: './src/pages',
});

brokenLinkChecker.run();

0 comments on commit 1a25683

Please sign in to comment.