Skip to content

Commit

Permalink
chore(build): separate markdown and html handling into two separate s…
Browse files Browse the repository at this point in the history
…tages (#1675)
  • Loading branch information
necauqua authored Jan 7, 2025
1 parent b7a945e commit c90dbac
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 34 deletions.
5 changes: 3 additions & 2 deletions quartz/bootstrap-worker.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env node
import workerpool from "workerpool"
const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
const { parseFiles } = await import(cacheFile)
const { parseMarkdown, processHtml } = await import(cacheFile)
workerpool.worker({
parseFiles,
parseMarkdown,
processHtml,
})
8 changes: 5 additions & 3 deletions quartz/plugins/vfile.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { Node, Parent } from "hast"
import { Root as HtmlRoot } from "hast"
import { Root as MdRoot } from "mdast"
import { Data, VFile } from "vfile"

export type QuartzPluginData = Data
export type ProcessedContent = [Node, VFile]
export type MarkdownContent = [MdRoot, VFile]
export type ProcessedContent = [HtmlRoot, VFile]

export function defaultProcessedContent(vfileData: Partial<QuartzPluginData>): ProcessedContent {
const root: Parent = { type: "root", children: [] }
const root: HtmlRoot = { type: "root", children: [] }
const vfile = new VFile("")
vfile.data = vfileData
return [root, vfile]
Expand Down
86 changes: 63 additions & 23 deletions quartz/processors/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@ import remarkRehype from "remark-rehype"
import { Processor, unified } from "unified"
import { Root as MDRoot } from "remark-parse/lib"
import { Root as HTMLRoot } from "hast"
import { ProcessedContent } from "../plugins/vfile"
import { MarkdownContent, ProcessedContent } from "../plugins/vfile"
import { PerfTimer } from "../util/perf"
import { read } from "to-vfile"
import { FilePath, QUARTZ, slugifyFilePath } from "../util/path"
import { FilePath, FullSlug, QUARTZ, slugifyFilePath } from "../util/path"
import path from "path"
import workerpool, { Promise as WorkerPromise } from "workerpool"
import { QuartzLogger } from "../util/log"
import { trace } from "../util/trace"
import { BuildCtx } from "../util/ctx"

export type QuartzProcessor = Processor<MDRoot, MDRoot, HTMLRoot>
export function createProcessor(ctx: BuildCtx): QuartzProcessor {
export type QuartzMdProcessor = Processor<MDRoot, MDRoot, MDRoot>
export type QuartzHtmlProcessor = Processor<undefined, MDRoot, HTMLRoot>

export function createMdProcessor(ctx: BuildCtx): QuartzMdProcessor {
const transformers = ctx.cfg.plugins.transformers

return (
Expand All @@ -24,14 +26,20 @@ export function createProcessor(ctx: BuildCtx): QuartzProcessor {
.use(remarkParse)
// MD AST -> MD AST transforms
.use(
transformers
.filter((p) => p.markdownPlugins)
.flatMap((plugin) => plugin.markdownPlugins!(ctx)),
)
transformers.flatMap((plugin) => plugin.markdownPlugins?.(ctx) ?? []),
) as unknown as QuartzMdProcessor
// ^ sadly the typing of `use` is not smart enough to infer the correct type from our plugin list
)
}

export function createHtmlProcessor(ctx: BuildCtx): QuartzHtmlProcessor {
const transformers = ctx.cfg.plugins.transformers
return (
unified()
// MD AST -> HTML AST
.use(remarkRehype, { allowDangerousHtml: true })
// HTML AST -> HTML AST transforms
.use(transformers.filter((p) => p.htmlPlugins).flatMap((plugin) => plugin.htmlPlugins!(ctx)))
.use(transformers.flatMap((plugin) => plugin.htmlPlugins?.(ctx) ?? []))
)
}

Expand Down Expand Up @@ -75,8 +83,8 @@ async function transpileWorkerScript() {

export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {
const { argv, cfg } = ctx
return async (processor: QuartzProcessor) => {
const res: ProcessedContent[] = []
return async (processor: QuartzMdProcessor) => {
const res: MarkdownContent[] = []
for (const fp of fps) {
try {
const perf = new PerfTimer()
Expand All @@ -100,10 +108,32 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {
res.push([newAst, file])

if (argv.verbose) {
console.log(`[process] ${fp} -> ${file.data.slug} (${perf.timeSince()})`)
console.log(`[markdown] ${fp} -> ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process \`${fp}\``, err as Error)
trace(`\nFailed to process markdown \`${fp}\``, err as Error)
}
}

return res
}
}

export function createMarkdownParser(ctx: BuildCtx, mdContent: MarkdownContent[]) {
return async (processor: QuartzHtmlProcessor) => {
const res: ProcessedContent[] = []
for (const [ast, file] of mdContent) {
try {
const perf = new PerfTimer()

const newAst = await processor.run(ast as MDRoot, file)
res.push([newAst, file])

if (ctx.argv.verbose) {
console.log(`[html] ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process html \`${file.data.filePath}\``, err as Error)
}
}

Expand All @@ -113,6 +143,7 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {

const clamp = (num: number, min: number, max: number) =>
Math.min(Math.max(Math.round(num), min), max)

export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<ProcessedContent[]> {
const { argv } = ctx
const perf = new PerfTimer()
Expand All @@ -126,9 +157,8 @@ export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<Pro
log.start(`Parsing input files using ${concurrency} threads`)
if (concurrency === 1) {
try {
const processor = createProcessor(ctx)
const parse = createFileParser(ctx, fps)
res = await parse(processor)
const mdRes = await createFileParser(ctx, fps)(createMdProcessor(ctx))
res = await createMarkdownParser(ctx, mdRes)(createHtmlProcessor(ctx))
} catch (error) {
log.end()
throw error
Expand All @@ -140,17 +170,27 @@ export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<Pro
maxWorkers: concurrency,
workerType: "thread",
})
const errorHandler = (err: any) => {
console.error(`${err}`.replace(/^error:\s*/i, ""))
process.exit(1)
}

const childPromises: WorkerPromise<ProcessedContent[]>[] = []
const mdPromises: WorkerPromise<[MarkdownContent[], FullSlug[]]>[] = []
for (const chunk of chunks(fps, CHUNK_SIZE)) {
childPromises.push(pool.exec("parseFiles", [ctx.buildId, argv, chunk, ctx.allSlugs]))
mdPromises.push(pool.exec("parseMarkdown", [ctx.buildId, argv, chunk]))
}
const mdResults: [MarkdownContent[], FullSlug[]][] =
await WorkerPromise.all(mdPromises).catch(errorHandler)

const childPromises: WorkerPromise<ProcessedContent[]>[] = []
for (const [_, extraSlugs] of mdResults) {
ctx.allSlugs.push(...extraSlugs)
}
for (const [mdChunk, _] of mdResults) {
childPromises.push(pool.exec("processHtml", [ctx.buildId, argv, mdChunk, ctx.allSlugs]))
}
const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch(errorHandler)

const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch((err) => {
const errString = err.toString().slice("Error:".length)
console.error(errString)
process.exit(1)
})
res = results.flat()
await pool.terminate()
}
Expand Down
35 changes: 29 additions & 6 deletions quartz/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,46 @@ sourceMapSupport.install(options)
import cfg from "../quartz.config"
import { Argv, BuildCtx } from "./util/ctx"
import { FilePath, FullSlug } from "./util/path"
import { createFileParser, createProcessor } from "./processors/parse"
import {
createFileParser,
createHtmlProcessor,
createMarkdownParser,
createMdProcessor,
} from "./processors/parse"
import { options } from "./util/sourcemap"
import { MarkdownContent, ProcessedContent } from "./plugins/vfile"

// only called from worker thread
export async function parseFiles(
export async function parseMarkdown(
buildId: string,
argv: Argv,
fps: FilePath[],
): Promise<[MarkdownContent[], FullSlug[]]> {
// this is a hack
// we assume markdown parsers can add to `allSlugs`,
// but don't actually use them
const allSlugs: FullSlug[] = []
const ctx: BuildCtx = {
buildId,
cfg,
argv,
allSlugs,
}
return [await createFileParser(ctx, fps)(createMdProcessor(ctx)), allSlugs]
}

// only called from worker thread
export function processHtml(
buildId: string,
argv: Argv,
mds: MarkdownContent[],
allSlugs: FullSlug[],
) {
): Promise<ProcessedContent[]> {
const ctx: BuildCtx = {
buildId,
cfg,
argv,
allSlugs,
}
const processor = createProcessor(ctx)
const parse = createFileParser(ctx, fps)
return parse(processor)
return createMarkdownParser(ctx, mds)(createHtmlProcessor(ctx))
}

0 comments on commit c90dbac

Please sign in to comment.