Skip to content

Commit

Permalink
fix(build): Separate markdown and html handling into stages
Browse files Browse the repository at this point in the history
First run all markdown transformers on all files, and then run all
html transformers on all files.

This allows us to augment `allSlugs` from a markdown processor, so that
we can add alias slugs from frontmatter, to be later used in the linker
html processor when transforming links.

For this to work with workers I had to write a very cringe hack.
To be able to build the allSlugs list correctly not only from filenames,
but also from frontmatter without such measures would require a complete
build process overhaul I feel like.
  • Loading branch information
necauqua committed Dec 27, 2024
1 parent a582505 commit f28ba5b
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 38 deletions.
5 changes: 3 additions & 2 deletions quartz/bootstrap-worker.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env node
import workerpool from "workerpool"
const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
const { parseFiles } = await import(cacheFile)
const { parseMarkdown, processHtml } = await import(cacheFile)
workerpool.worker({
parseFiles,
parseMarkdown,
processHtml,
})
17 changes: 14 additions & 3 deletions quartz/plugins/types.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,37 @@
import { PluggableList } from "unified"
import { StaticResources } from "../util/resources"
import { ProcessedContent } from "./vfile"
import { QuartzComponent } from "../components/types"
import { FilePath } from "../util/path"
import { BuildCtx } from "../util/ctx"
import DepGraph from "../depgraph"
import { Node, Plugin, PluginTuple, Settings } from "unified/lib"
import { Root as HtmlRoot } from "hast"
import { Root as MdRoot } from "mdast"

export interface PluginTypes {
transformers: QuartzTransformerPluginInstance[]
filters: QuartzFilterPluginInstance[]
emitters: QuartzEmitterPluginInstance[]
}

// unist PluggableList except input/output is typed
type PluggableList<I extends Node = Node, O extends Node = Node> = Array<
Plugin<Array<any>, I, O> | PluginTuple<Array<any>, I, O> | Preset<I, O>
>
type Preset<I extends Node = Node, O extends Node = Node> = {
plugins?: PluggableList<I, O> | undefined
settings?: Settings | undefined
}

type OptionType = object | undefined
export type QuartzTransformerPlugin<Options extends OptionType = undefined> = (
opts?: Options,
) => QuartzTransformerPluginInstance
export type QuartzTransformerPluginInstance = {
name: string
textTransform?: (ctx: BuildCtx, src: string | Buffer) => string | Buffer
markdownPlugins?: (ctx: BuildCtx) => PluggableList
htmlPlugins?: (ctx: BuildCtx) => PluggableList
markdownPlugins?: (ctx: BuildCtx) => PluggableList<MdRoot, MdRoot>
htmlPlugins?: (ctx: BuildCtx) => PluggableList<HtmlRoot, HtmlRoot>
externalResources?: (ctx: BuildCtx) => Partial<StaticResources>
}

Expand Down
8 changes: 5 additions & 3 deletions quartz/plugins/vfile.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { Node, Parent } from "hast"
import { Root as HtmlRoot, Parent } from "hast"
import { Root as MdRoot } from "mdast"
import { Data, VFile } from "vfile"

export type QuartzPluginData = Data
export type ProcessedContent = [Node, VFile]
export type MarkdownContent = [MdRoot, VFile]
export type ProcessedContent = [HtmlRoot, VFile]

export function defaultProcessedContent(vfileData: Partial<QuartzPluginData>): ProcessedContent {
const root: Parent = { type: "root", children: [] }
const root = { type: "root" as const, children: [] }
const vfile = new VFile("")
vfile.data = vfileData
return [root, vfile]
Expand Down
95 changes: 71 additions & 24 deletions quartz/processors/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@ import remarkRehype from "remark-rehype"
import { Processor, unified } from "unified"
import { Root as MDRoot } from "remark-parse/lib"
import { Root as HTMLRoot } from "hast"
import { ProcessedContent } from "../plugins/vfile"
import { MarkdownContent, ProcessedContent } from "../plugins/vfile"
import { PerfTimer } from "../util/perf"
import { read } from "to-vfile"
import { FilePath, QUARTZ, slugifyFilePath } from "../util/path"
import { FilePath, FullSlug, QUARTZ, slugifyFilePath } from "../util/path"
import path from "path"
import workerpool, { Promise as WorkerPromise } from "workerpool"
import { QuartzLogger } from "../util/log"
import { trace } from "../util/trace"
import { BuildCtx } from "../util/ctx"

export type QuartzProcessor = Processor<MDRoot, MDRoot, HTMLRoot>
export function createProcessor(ctx: BuildCtx): QuartzProcessor {
export type QuartzMdProcessor = Processor<MDRoot, MDRoot, MDRoot>
export type QuartzHtmlProcessor = Processor<undefined, MDRoot, HTMLRoot>

export function createMdProcessor(ctx: BuildCtx): QuartzMdProcessor {
const transformers = ctx.cfg.plugins.transformers

return (
Expand All @@ -24,14 +26,20 @@ export function createProcessor(ctx: BuildCtx): QuartzProcessor {
.use(remarkParse)
// MD AST -> MD AST transforms
.use(
transformers
.filter((p) => p.markdownPlugins)
.flatMap((plugin) => plugin.markdownPlugins!(ctx)),
)
transformers.flatMap((plugin) => plugin.markdownPlugins?.(ctx) ?? []),
) as unknown as QuartzMdProcessor
// ^ sadly the typing of `use` is not smart enough to infer the correct type from our plugin list
)
}

export function createHtmlProcessor(ctx: BuildCtx): QuartzHtmlProcessor {
const transformers = ctx.cfg.plugins.transformers
return (
unified()
// MD AST -> HTML AST
.use(remarkRehype, { allowDangerousHtml: true })
// HTML AST -> HTML AST transforms
.use(transformers.filter((p) => p.htmlPlugins).flatMap((plugin) => plugin.htmlPlugins!(ctx)))
.use(transformers.flatMap((plugin) => plugin.htmlPlugins?.(ctx) ?? []))
)
}

Expand Down Expand Up @@ -73,10 +81,13 @@ async function transpileWorkerScript() {
})
}

export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {
export function createFileParser(
ctx: BuildCtx,
fps: FilePath[],
): (processor: QuartzMdProcessor) => Promise<MarkdownContent[]> {
const { argv, cfg } = ctx
return async (processor: QuartzProcessor) => {
const res: ProcessedContent[] = []
return async (processor) => {
const res: MarkdownContent[] = []
for (const fp of fps) {
try {
const perf = new PerfTimer()
Expand All @@ -100,10 +111,35 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {
res.push([newAst, file])

if (argv.verbose) {
console.log(`[process] ${fp} -> ${file.data.slug} (${perf.timeSince()})`)
console.log(`[markdown] ${fp} -> ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process \`${fp}\``, err as Error)
trace(`\nFailed to process markdown \`${fp}\``, err as Error)
}
}

return res
}
}

export function createMarkdownParser(
ctx: BuildCtx,
mdContent: MarkdownContent[],
): (processor: QuartzHtmlProcessor) => Promise<ProcessedContent[]> {
return async (processor) => {
const res: ProcessedContent[] = []
for (const [ast, file] of mdContent) {
try {
const perf = new PerfTimer()

const newAst = await processor.run(ast as MDRoot, file)
res.push([newAst, file])

if (ctx.argv.verbose) {
console.log(`[html] ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process html \`${file.data.filePath}\``, err as Error)
}
}

Expand All @@ -113,6 +149,7 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {

const clamp = (num: number, min: number, max: number) =>
Math.min(Math.max(Math.round(num), min), max)

export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<ProcessedContent[]> {
const { argv } = ctx
const perf = new PerfTimer()
Expand All @@ -126,9 +163,8 @@ export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<Pro
log.start(`Parsing input files using ${concurrency} threads`)
if (concurrency === 1) {
try {
const processor = createProcessor(ctx)
const parse = createFileParser(ctx, fps)
res = await parse(processor)
const mdRes = await createFileParser(ctx, fps)(createMdProcessor(ctx))
res = await createMarkdownParser(ctx, mdRes)(createHtmlProcessor(ctx))
} catch (error) {
log.end()
throw error
Expand All @@ -140,17 +176,28 @@ export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<Pro
maxWorkers: concurrency,
workerType: "thread",
})
const errorHandler = (err: any) => {
const errString = err.toString().slice("Error:".length)
console.error(errString)
process.exit(1)
}

const childPromises: WorkerPromise<ProcessedContent[]>[] = []
const mdPromises: WorkerPromise<[MarkdownContent[], FullSlug[]]>[] = []
for (const chunk of chunks(fps, CHUNK_SIZE)) {
childPromises.push(pool.exec("parseFiles", [ctx.buildId, argv, chunk, ctx.allSlugs]))
mdPromises.push(pool.exec("parseMarkdown", [ctx.buildId, argv, chunk]))
}
const mdResults: [MarkdownContent[], FullSlug[]][] =
await WorkerPromise.all(mdPromises).catch(errorHandler)

const childPromises: WorkerPromise<ProcessedContent[]>[] = []
for (const [_, extraSlugs] of mdResults) {
ctx.allSlugs.push(...extraSlugs)
}
for (const [mdChunk, _] of mdResults) {
childPromises.push(pool.exec("processHtml", [ctx.buildId, argv, mdChunk, ctx.allSlugs]))
}
const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch(errorHandler)

const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch((err) => {
const errString = err.toString().slice("Error:".length)
console.error(errString)
process.exit(1)
})
res = results.flat()
await pool.terminate()
}
Expand Down
35 changes: 29 additions & 6 deletions quartz/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,46 @@ sourceMapSupport.install(options)
import cfg from "../quartz.config"
import { Argv, BuildCtx } from "./util/ctx"
import { FilePath, FullSlug } from "./util/path"
import { createFileParser, createProcessor } from "./processors/parse"
import {
createFileParser,
createHtmlProcessor,
createMarkdownParser,
createMdProcessor,
} from "./processors/parse"
import { options } from "./util/sourcemap"
import { MarkdownContent, ProcessedContent } from "./plugins/vfile"

// only called from worker thread
export async function parseFiles(
export async function parseMarkdown(
buildId: string,
argv: Argv,
fps: FilePath[],
): Promise<[MarkdownContent[], FullSlug[]]> {
// this is a hack
// we assume markdown parsers can add to `allSlugs`,
// but don't actually use them
const allSlugs: FullSlug[] = []
const ctx: BuildCtx = {
buildId,
cfg,
argv,
allSlugs,
}
return [await createFileParser(ctx, fps)(createMdProcessor(ctx)), allSlugs]
}

// only called from worker thread
export function processHtml(
buildId: string,
argv: Argv,
mds: MarkdownContent[],
allSlugs: FullSlug[],
) {
): Promise<ProcessedContent[]> {
const ctx: BuildCtx = {
buildId,
cfg,
argv,
allSlugs,
}
const processor = createProcessor(ctx)
const parse = createFileParser(ctx, fps)
return parse(processor)
return createMarkdownParser(ctx, mds)(createHtmlProcessor(ctx))
}

0 comments on commit f28ba5b

Please sign in to comment.