From 1172c1463e3692ea6b0a81b85e007476663195b8 Mon Sep 17 00:00:00 2001 From: Dave Abrahams Date: Thu, 17 Aug 2023 19:10:51 -0700 Subject: [PATCH] Documentation extraction and publishing (#836) --- .github/workflows/doc-extraction.yml | 141 ++ .jazzy.yml | 5 + Package.resolved | 18 + Package.swift | 1 + .../TestGeneratorPlugin.swift | 2 +- Tools/doc-index.html.gyb | 67 + Tools/gyb.py | 1262 +++++++++++++++++ 7 files changed, 1495 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/doc-extraction.yml create mode 100644 .jazzy.yml create mode 100644 Tools/doc-index.html.gyb create mode 100755 Tools/gyb.py diff --git a/.github/workflows/doc-extraction.yml b/.github/workflows/doc-extraction.yml new file mode 100644 index 000000000..acf38a75c --- /dev/null +++ b/.github/workflows/doc-extraction.yml @@ -0,0 +1,141 @@ +name: Extract And Publish Documentation + +defaults: + run: + shell: bash -eo pipefail {0} + +on: + push: + branches: [ main ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - run: git config --global core.autocrlf input + + - name: Setup swift + uses: swift-actions/setup-swift@v1 + with: + swift-version: 5.8 + + - run: swift --version + + - name: Setup LLVM + uses: KyleMayes/install-llvm-action@v1 + with: + version: "15.0" + + - run: llvm-config --version + + - name: Prepare Shell Environment + # The format of the ${GITHUB_ENV} file is extremely restrictive; it apparently only supports + # lines of the form: + # + # = + # + # And a multiline version + # (https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#multiline-strings). + # It is not interpreted directly by a shell, so any quotes or other special characters are + # taken literally. + + # FIXME: REF_URL_COMPONENT computation is probably wrong for some refs. + run: | + echo "PKG_CONFIG_PATH=$PWD + REPO_SANS_OWNER=${GITHUB_REPOSITORY##*/} + REF_URL_COMPONENT=${GITHUB_REF##*/} + EXTRACTION_TARGETS=$( + swift package dump-package | + jq '.targets | map(select(.type | test("^(regular|executable)$"))) | .[].name' | + xargs + ) + " >> "${GITHUB_ENV}" + + - name: Generate LLVM pkgconfig file + run: | + swift package resolve + .build/checkouts/Swifty-LLVM/Tools/make-pkgconfig.sh llvm.pc + cat llvm.pc + + - name: Extract with DocC + run: | + export PKG_CONFIG_PATH + for TARGET in ${EXTRACTION_TARGETS}; do + mkdir -p _site/docc/"$TARGET" + swift package --allow-writing-to-directory ./_site \ + generate-documentation \ + --target "$TARGET" \ + --output-path _site/docc/"${TARGET}" \ + --experimental-documentation-coverage --level brief \ + --enable-inherited-docs \ + --transform-for-static-hosting \ + --hosting-base-path "${REPO_SANS_OWNER}/docc/${TARGET}" \ + --source-service github \ + --source-service-base-url "https://github.com/${GITHUB_REPOSITORY}/blob/${REF_URL_COMPONENT}" \ + --checkout-path "$(pwd)" + done + + - name: Extract with Jazzy + run: | + export PKG_CONFIG_PATH + gem install jazzy + for TARGET in ${EXTRACTION_TARGETS}; do + mkdir -p _site/jazzy/"$TARGET" + jazzy \ + --source-host-files-url "https://github.com/${GITHUB_REPOSITORY}/blob/${REF_URL_COMPONENT}" \ + --module "$TARGET" \ + --module-version "${{ github.event.release.tag_name }}" \ + --copyright "© $(date '+%Y') The Hylo Authors. (Last updated: $(date '+%Y-%m-%d'))" \ + --config .jazzy.yml \ + --output _site/jazzy/"$TARGET" \ + --min-acl private + done + + - name: Generate Index Page + run: | + Tools/gyb.py \ + --line-directive '' \ + -DROOT_URL="https://hylo-lang.org/${REPO_SANS_OWNER}" \ + -DEXTRACTION_TARGETS="${EXTRACTION_TARGETS}" \ + -DGITHUB_REPOSITORY="${GITHUB_REPOSITORY}" \ + Tools/doc-index.html.gyb -o _site/index.html + + - name: Setup Pages + uses: actions/configure-pages@v3 + + - name: Fix permissions + run: | + chmod -v -R +rX "_site/" | while read line; do + echo "::warning title=Invalid file permissions automatically fixed::$line" + done + + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.jazzy.yml b/.jazzy.yml new file mode 100644 index 000000000..ac60c42a4 --- /dev/null +++ b/.jazzy.yml @@ -0,0 +1,5 @@ +module: Hylo +author: Dimi Racordon +theme: fullwidth +author_url: "https://hylo-lang.dev" +github_url: "https://github.com/hylo-lang" diff --git a/Package.resolved b/Package.resolved index 43b1be7dc..a19da3a35 100644 --- a/Package.resolved +++ b/Package.resolved @@ -45,6 +45,24 @@ "version" : "1.0.4" } }, + { + "identity" : "swift-docc-plugin", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-docc-plugin.git", + "state" : { + "revision" : "26ac5758409154cc448d7ab82389c520fa8a8247", + "version" : "1.3.0" + } + }, + { + "identity" : "swift-docc-symbolkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-docc-symbolkit", + "state" : { + "revision" : "b45d1f2ed151d057b54504d653e0da5552844e34", + "version" : "1.0.0" + } + }, { "identity" : "swift-format", "kind" : "remoteSourceControl", diff --git a/Package.swift b/Package.swift index 450b19542..8fa9fbfb7 100644 --- a/Package.swift +++ b/Package.swift @@ -36,6 +36,7 @@ let package = Package( .package( url: "https://github.com/apple/swift-format", from: "508.0.1"), + .package(url: "https://github.com/apple/swift-docc-plugin.git", from: "1.1.0"), .package( url: "https://github.com/SwiftPackageIndex/SPIManifest.git", from: "0.12.0"), diff --git a/Plugins/TestGeneratorPlugin/TestGeneratorPlugin.swift b/Plugins/TestGeneratorPlugin/TestGeneratorPlugin.swift index e1eca8c5b..daecabf9c 100644 --- a/Plugins/TestGeneratorPlugin/TestGeneratorPlugin.swift +++ b/Plugins/TestGeneratorPlugin/TestGeneratorPlugin.swift @@ -11,7 +11,7 @@ struct TestGeneratorPlugin: BuildToolPlugin { let outputPath = context.pluginWorkDirectory.appending("HyloFileTests.swift") let cmd = Command.buildCommand( - displayName: "Generating XCTestCases for \(inputPaths.map(\.stem)) into \(outputPath)", + displayName: "Generating XCTestCases into \(outputPath)", executable: try context.tool(named: "GenerateHyloFileTests").path, arguments: ["-o", outputPath, "-n", target.moduleName] + inputPaths, inputFiles: inputPaths, diff --git a/Tools/doc-index.html.gyb b/Tools/doc-index.html.gyb new file mode 100644 index 000000000..66e7b494e --- /dev/null +++ b/Tools/doc-index.html.gyb @@ -0,0 +1,67 @@ + + + + + + + + + +${GITHUB_REPOSITORY} Implementation Documentation + + + + + + + + + + + + + + + + + + +
+
+ + Logo + + +

The Hylo Programming Language

+ + +

Language tour

+

Roadmap

+

Community discussion

+ + + +
+
+

${GITHUB_REPOSITORY} Implementation Documentation

+

None of the available documentation extraction tools can merge and cross-link + documentation from different targets, so they are presented separately and each has its own + link.

+

Targets

+
    +% for m in EXTRACTION_TARGETS.split(" "): +
  • ${m} (docc, jazzy)
  • +% end +
+
+
+ +

Hosted on GitHub Pages — Theme by orderedlist

+
+ + + + diff --git a/Tools/gyb.py b/Tools/gyb.py new file mode 100755 index 000000000..520d1dec3 --- /dev/null +++ b/Tools/gyb.py @@ -0,0 +1,1262 @@ +#!/usr/bin/env python3 +# GYB: Generate Your Boilerplate (improved names welcome; at least +# this one's short). See -h output for instructions + +import io +import os +import re +import sys +import textwrap +import tokenize +from bisect import bisect +from io import StringIO + + +def get_line_starts(s): + """Return a list containing the start index of each line in s. + + The list also contains a sentinel index for the end of the string, + so there will be one more element in the list than there are lines + in the string + """ + starts = [0] + + for line in s.split('\n'): + starts.append(starts[-1] + len(line) + 1) + + starts[-1] -= 1 + return starts + + +def strip_trailing_nl(s): + """If s ends with a newline, drop it; else return s intact""" + return s[:-1] if s.endswith('\n') else s + + +def split_lines(s): + """Split s into a list of lines, each of which has a trailing newline + + If the lines are later concatenated, the result is s, possibly + with a single appended newline. + """ + return [line + '\n' for line in s.split('\n')] + + +# text on a line up to the first '$$', '${', or '%%' +literalText = r'(?: [^$\n%] | \$(?![${]) | %(?!%) )*' + +# The part of an '%end' line that follows the '%' sign +linesClose = r'[\ \t]* end [\ \t]* (?: \# .* )? $' + +# Note: Where "# Absorb" appears below, the regexp attempts to eat up +# through the end of ${...} and %{...}% constructs. In reality we +# handle this with the Python tokenizer, which avoids mis-detections +# due to nesting, comments and strings. This extra absorption in the +# regexp facilitates testing the regexp on its own, by preventing the +# interior of some of these constructs from being treated as literal +# text. +tokenize_re = re.compile( + r''' +# %-lines and %{...}-blocks + # \n? # absorb one preceding newline + ^ + (?: + (?P + (?P<_indent> [\ \t]* % (?! [{%] ) [\ \t]* ) (?! [\ \t] | ''' + + linesClose + r''' ) .* + ( \n (?P=_indent) (?! ''' + linesClose + r''' ) .* ) * + ) + | (?P [\ \t]* % [ \t]* ''' + linesClose + r''' ) + | [\ \t]* (?P %\{ ) + (?: [^}]| \} (?!%) )* \}% # Absorb + ) + \n? # absorb one trailing newline + +# Substitutions +| (?P \$\{ ) + [^}]* \} # Absorb + +# %% and $$ are literal % and $ respectively +| (?P[$%]) (?P=symbol) + +# Literal text +| (?P ''' + literalText + r''' + (?: + # newline that doesn't precede space+% + (?: \n (?! [\ \t]* %[^%] ) ) + ''' + literalText + r''' + )* + \n? + ) +''', re.VERBOSE | re.MULTILINE) + +gyb_block_close = re.compile(r'\}%[ \t]*\n?') + + +def token_pos_to_index(token_pos, start, line_starts): + """Translate a tokenize (line, column) pair into an absolute + position in source text given the position where we started + tokenizing and a list that maps lines onto their starting + character indexes. + """ + relative_token_line_plus1, token_col = token_pos + + # line number where we started tokenizing + start_line_num = bisect(line_starts, start) - 1 + + # line number of the token in the whole text + abs_token_line = relative_token_line_plus1 - 1 + start_line_num + + # if found in the first line, adjust the end column to account + # for the extra text + if relative_token_line_plus1 == 1: + token_col += start - line_starts[start_line_num] + + # Sometimes tokenizer errors report a line beyond the last one + if abs_token_line >= len(line_starts): + return line_starts[-1] + + return line_starts[abs_token_line] + token_col + + +def tokenize_python_to_unmatched_close_curly(source_text, start, line_starts): + """Apply Python's tokenize to source_text starting at index start + while matching open and close curly braces. When an unmatched + close curly brace is found, return its index. If not found, + return len(source_text). If there's a tokenization error, return + the position of the error. + """ + stream = StringIO(source_text) + stream.seek(start) + nesting = 0 + + try: + for kind, text, token_start, token_end, line_text \ + in tokenize.generate_tokens(stream.readline): + + if text == '{': + nesting += 1 + elif text == '}': + nesting -= 1 + if nesting < 0: + return token_pos_to_index(token_start, start, line_starts) + + except tokenize.TokenError as error: + (message, error_pos) = error.args + return token_pos_to_index(error_pos, start, line_starts) + + return len(source_text) + + +def tokenize_template(template_text): + r"""Given the text of a template, returns an iterator over + (tokenType, token, match) tuples. + + **Note**: this is template syntax tokenization, not Python + tokenization. + + When a non-literal token is matched, a client may call + iter.send(pos) on the iterator to reset the position in + template_text at which scanning will resume. + + This function provides a base level of tokenization which is + then refined by ParseContext.token_generator. + + >>> from pprint import * + >>> pprint(list((kind, text) for kind, text, _ in tokenize_template( + ... '%for x in range(10):\n% print x\n%end\njuicebox'))) + [('gybLines', '%for x in range(10):\n% print x'), + ('gybLinesClose', '%end'), + ('literal', 'juicebox')] + + >>> pprint(list((kind, text) for kind, text, _ in tokenize_template( + ... '''Nothing + ... % if x: + ... % for i in range(3): + ... ${i} + ... % end + ... % else: + ... THIS SHOULD NOT APPEAR IN THE OUTPUT + ... '''))) + [('literal', 'Nothing\n'), + ('gybLines', '% if x:\n% for i in range(3):'), + ('substitutionOpen', '${'), + ('literal', '\n'), + ('gybLinesClose', '% end'), + ('gybLines', '% else:'), + ('literal', 'THIS SHOULD NOT APPEAR IN THE OUTPUT\n')] + + >>> for kind, text, _ in tokenize_template(''' + ... This is $some$ literal stuff containing a ${substitution} + ... followed by a %{...} block: + ... %{ + ... # Python code + ... }% + ... and here $${are} some %-lines: + ... % x = 1 + ... % y = 2 + ... % if z == 3: + ... % print '${hello}' + ... % end + ... % for x in zz: + ... % print x + ... % # different indentation + ... % twice + ... and some lines that literally start with a %% token + ... %% first line + ... %% second line + ... '''): + ... print((kind, text.strip().split('\n',1)[0])) + ('literal', 'This is $some$ literal stuff containing a') + ('substitutionOpen', '${') + ('literal', 'followed by a %{...} block:') + ('gybBlockOpen', '%{') + ('literal', 'and here ${are} some %-lines:') + ('gybLines', '% x = 1') + ('gybLinesClose', '% end') + ('gybLines', '% for x in zz:') + ('gybLines', '% # different indentation') + ('gybLines', '% twice') + ('literal', 'and some lines that literally start with a % token') + """ + pos = 0 + end = len(template_text) + + saved_literal = [] + literal_first_match = None + + while pos < end: + m = tokenize_re.match(template_text, pos, end) + + # pull out the one matched key (ignoring internal patterns starting + # with _) + ((kind, text), ) = ( + (kind, text) for (kind, text) in m.groupdict().items() + if text is not None and kind[0] != '_') + + if kind in ('literal', 'symbol'): + if len(saved_literal) == 0: + literal_first_match = m + # literals and symbols get batched together + saved_literal.append(text) + pos = None + else: + # found a non-literal. First yield any literal we've accumulated + if saved_literal != []: + yield 'literal', ''.join(saved_literal), literal_first_match + saved_literal = [] + + # Then yield the thing we found. If we get a reply, it's + # the place to resume tokenizing + pos = yield kind, text, m + + # If we were not sent a new position by our client, resume + # tokenizing at the end of this match. + if pos is None: + pos = m.end(0) + else: + # Client is not yet ready to process next token + yield + + if saved_literal != []: + yield 'literal', ''.join(saved_literal), literal_first_match + + +def split_gyb_lines(source_lines): + r"""Return a list of lines at which to split the incoming source + + These positions represent the beginnings of python line groups that + will require a matching %end construct if they are to be closed. + + >>> src = split_lines('''\ + ... if x: + ... print x + ... if y: # trailing comment + ... print z + ... if z: # another comment\ + ... ''') + >>> s = split_gyb_lines(src) + >>> len(s) + 2 + >>> src[s[0]] + ' print z\n' + >>> s[1] - len(src) + 0 + + >>> src = split_lines('''\ + ... if x: + ... if y: print 1 + ... if z: + ... print 2 + ... pass\ + ... ''') + >>> s = split_gyb_lines(src) + >>> len(s) + 1 + >>> src[s[0]] + ' if y: print 1\n' + + >>> src = split_lines('''\ + ... if x: + ... if y: + ... print 1 + ... print 2 + ... ''') + >>> s = split_gyb_lines(src) + >>> len(s) + 2 + >>> src[s[0]] + ' if y:\n' + >>> src[s[1]] + ' print 1\n' + """ + last_token_text, last_token_kind = None, None + unmatched_indents = [] + + dedents = 0 + try: + for token_kind, token_text, token_start, \ + (token_end_line, token_end_col), line_text \ + in tokenize.generate_tokens(lambda i=iter(source_lines): + next(i)): + + if token_kind in (tokenize.COMMENT, tokenize.ENDMARKER): + continue + + if token_text == '\n' and last_token_text == ':': + unmatched_indents.append(token_end_line) + + # The tokenizer appends dedents at EOF; don't consider + # those as matching indentations. Instead just save them + # up... + if last_token_kind == tokenize.DEDENT: + dedents += 1 + # And count them later, when we see something real. + if token_kind != tokenize.DEDENT and dedents > 0: + unmatched_indents = unmatched_indents[:-dedents] + dedents = 0 + + last_token_text, last_token_kind = token_text, token_kind + + except tokenize.TokenError: + # Let the later compile() call report the error + return [] + + if last_token_text == ':': + unmatched_indents.append(len(source_lines)) + + return unmatched_indents + + +def code_starts_with_dedent_keyword(source_lines): + r"""Return True iff the incoming Python source_lines begin with "else", + "elif", "except", or "finally". + + Initial comments and whitespace are ignored. + + >>> code_starts_with_dedent_keyword(split_lines('if x in y: pass')) + False + >>> code_starts_with_dedent_keyword(split_lines('except ifSomethingElse:')) + True + >>> code_starts_with_dedent_keyword( + ... split_lines('\n# comment\nelse: # yes')) + True + """ + token_text = None + for token_kind, token_text, _, _, _ \ + in tokenize.generate_tokens(lambda i=iter(source_lines): next(i)): + + if token_kind != tokenize.COMMENT and token_text.strip() != '': + break + + return token_text in ('else', 'elif', 'except', 'finally') + + +class ParseContext(object): + + """State carried through a parse of a template""" + + filename = '' + template = '' + line_starts = [] + code_start_line = -1 + code_text = None + tokens = None # The rest of the tokens + close_lines = False + + def __init__(self, filename, template=None): + self.filename = os.path.abspath(filename) + if sys.platform == 'win32': + self.filename = '/'.join(self.filename.split(os.sep)) + if template is None: + with io.open(os.path.normpath(filename), encoding='utf-8') as f: + self.template = f.read() + else: + self.template = template + self.line_starts = get_line_starts(self.template) + self.tokens = self.token_generator(tokenize_template(self.template)) + self.next_token() + + def pos_to_line(self, pos): + return bisect(self.line_starts, pos) - 1 + + def token_generator(self, base_tokens): + r"""Given an iterator over (kind, text, match) triples (see + tokenize_template above), return a refined iterator over + token_kinds. + + Among other adjustments to the elements found by base_tokens, + this refined iterator tokenizes python code embedded in + template text to help determine its true extent. The + expression "base_tokens.send(pos)" is used to reset the index at + which base_tokens resumes scanning the underlying text. + + >>> ctx = ParseContext('dummy', ''' + ... %for x in y: + ... % print x + ... % end + ... literally + ... ''') + >>> while ctx.token_kind: + ... print((ctx.token_kind, ctx.code_text or ctx.token_text)) + ... ignored = ctx.next_token() + ('literal', '\n') + ('gybLinesOpen', 'for x in y:\n') + ('gybLines', ' print x\n') + ('gybLinesClose', '% end') + ('literal', 'literally\n') + + >>> ctx = ParseContext('dummy', + ... '''Nothing + ... % if x: + ... % for i in range(3): + ... ${i} + ... % end + ... % else: + ... THIS SHOULD NOT APPEAR IN THE OUTPUT + ... ''') + >>> while ctx.token_kind: + ... print((ctx.token_kind, ctx.code_text or ctx.token_text)) + ... ignored = ctx.next_token() + ('literal', 'Nothing\n') + ('gybLinesOpen', 'if x:\n') + ('gybLinesOpen', ' for i in range(3):\n') + ('substitutionOpen', 'i') + ('literal', '\n') + ('gybLinesClose', '% end') + ('gybLinesOpen', 'else:\n') + ('literal', 'THIS SHOULD NOT APPEAR IN THE OUTPUT\n') + + >>> ctx = ParseContext('dummy', + ... '''% for x in [1, 2, 3]: + ... % if x == 1: + ... literal1 + ... % elif x > 1: # add output line here to fix bug + ... % if x == 2: + ... literal2 + ... % end + ... % end + ... % end + ... ''') + >>> while ctx.token_kind: + ... print((ctx.token_kind, ctx.code_text or ctx.token_text)) + ... ignored = ctx.next_token() + ('gybLinesOpen', 'for x in [1, 2, 3]:\n') + ('gybLinesOpen', ' if x == 1:\n') + ('literal', 'literal1\n') + ('gybLinesOpen', 'elif x > 1: # add output line here to fix bug\n') + ('gybLinesOpen', ' if x == 2:\n') + ('literal', 'literal2\n') + ('gybLinesClose', '% end') + ('gybLinesClose', '% end') + ('gybLinesClose', '% end') + """ + for self.token_kind, self.token_text, self.token_match in base_tokens: + kind = self.token_kind + self.code_text = None + + # Do we need to close the current lines? + self.close_lines = kind == 'gybLinesClose' + + # %{...}% and ${...} constructs + if kind.endswith('Open'): + + # Tokenize text that follows as Python up to an unmatched '}' + code_start = self.token_match.end(kind) + self.code_start_line = self.pos_to_line(code_start) + + close_pos = tokenize_python_to_unmatched_close_curly( + self.template, code_start, self.line_starts) + self.code_text = self.template[code_start:close_pos] + yield kind + + if (kind == 'gybBlockOpen'): + # Absorb any '}% \n' + m2 = gyb_block_close.match(self.template, close_pos) + if not m2: + raise ValueError("Invalid block closure") + next_pos = m2.end(0) + else: + assert kind == 'substitutionOpen' + # skip past the closing '}' + next_pos = close_pos + 1 + + # Resume tokenizing after the end of the code. + base_tokens.send(next_pos) + + elif kind == 'gybLines': + + self.code_start_line = self.pos_to_line( + self.token_match.start('gybLines')) + indentation = self.token_match.group('_indent') + + # Strip off the leading indentation and %-sign + source_lines = re.split( + '^' + re.escape(indentation), + self.token_match.group('gybLines') + '\n', + flags=re.MULTILINE)[1:] + + if code_starts_with_dedent_keyword(source_lines): + self.close_lines = True + + last_split = 0 + for line in split_gyb_lines(source_lines): + self.token_kind = 'gybLinesOpen' + self.code_text = ''.join(source_lines[last_split:line]) + yield self.token_kind + last_split = line + self.code_start_line += line - last_split + self.close_lines = False + + self.code_text = ''.join(source_lines[last_split:]) + if self.code_text: + self.token_kind = 'gybLines' + yield self.token_kind + else: + yield self.token_kind + + def next_token(self): + """Move to the next token""" + for kind in self.tokens: + return self.token_kind + + self.token_kind = None + + +_default_line_directive = \ + '// ###sourceLocation(file: "%(file)s", line: %(line)d)' + + +class ExecutionContext(object): + + """State we pass around during execution of a template""" + + def __init__(self, line_directive=_default_line_directive, + **local_bindings): + self.local_bindings = local_bindings + self.line_directive = line_directive + self.local_bindings['__context__'] = self + self.result_text = [] + self.last_file_line = None + + def append_text(self, text, file, line): + # see if we need to inject a line marker + if self.line_directive: + if (file, line) != self.last_file_line: + # We can only insert the line directive at a line break + if len(self.result_text) == 0 \ + or self.result_text[-1].endswith('\n'): + substitutions = {'file': file, 'line': line + 1} + format_str = self.line_directive + '\n' + self.result_text.append(format_str % substitutions) + # But if the new text contains any line breaks, we can create + # one + elif '\n' in text: + i = text.find('\n') + self.result_text.append(text[:i + 1]) + # and try again + self.append_text(text[i + 1:], file, line + 1) + return + + self.result_text.append(text) + self.last_file_line = (file, line + text.count('\n')) + + +class ASTNode(object): + + """Abstract base class for template AST nodes""" + + def __init__(self): + raise NotImplementedError("ASTNode.__init__ is not implemented.") + + def execute(self, context): + raise NotImplementedError("ASTNode.execute is not implemented.") + + def __str__(self, indent=''): + raise NotImplementedError("ASTNode.__str__ is not implemented.") + + def format_children(self, indent): + if not self.children: + return ' []' + + return '\n'.join( + ['', indent + '['] + + [x.__str__(indent + 4 * ' ') for x in self.children] + + [indent + ']']) + + +class Block(ASTNode): + + """A sequence of other AST nodes, to be executed in order""" + + children = [] + + def __init__(self, context): + self.children = [] + + while context.token_kind and not context.close_lines: + if context.token_kind == 'literal': + node = Literal + else: + node = Code + self.children.append(node(context)) + + def execute(self, context): + for x in self.children: + x.execute(context) + + def __str__(self, indent=''): + return indent + 'Block:' + self.format_children(indent) + + +class Literal(ASTNode): + + """An AST node that generates literal text""" + + def __init__(self, context): + self.text = context.token_text + start_position = context.token_match.start(context.token_kind) + self.start_line_number = context.pos_to_line(start_position) + self.filename = context.filename + context.next_token() + + def execute(self, context): + context.append_text(self.text, self.filename, self.start_line_number) + + def __str__(self, indent=''): + return '\n'.join( + [indent + x for x in ['Literal:'] + + strip_trailing_nl(self.text).split('\n')]) + + +class Code(ASTNode): + + """An AST node that is evaluated as Python""" + + code = None + children = () + kind = None + + def __init__(self, context): + + source = '' + source_line_count = 0 + + def accumulate_code(): + s = source + (context.code_start_line - source_line_count) * '\n' \ + + textwrap.dedent(context.code_text) + line_count = context.code_start_line + \ + context.code_text.count('\n') + context.next_token() + return s, line_count + + eval_exec = 'exec' + if context.token_kind.startswith('substitution'): + eval_exec = 'eval' + source, source_line_count = accumulate_code() + source = '(' + source.strip() + ')' + + else: + while context.token_kind == 'gybLinesOpen': + source, source_line_count = accumulate_code() + source += ' __children__[%d].execute(__context__)\n' % len( + self.children) + source_line_count += 1 + + self.children += (Block(context),) + + if context.token_kind == 'gybLinesClose': + context.next_token() + + if context.token_kind == 'gybLines': + source, source_line_count = accumulate_code() + + # Only handle a substitution as part of this code block if + # we don't already have some %-lines. + elif context.token_kind == 'gybBlockOpen': + + # Opening ${...} and %{...}% constructs + source, source_line_count = accumulate_code() + + self.filename = context.filename + self.start_line_number = context.code_start_line + self.code = compile(source, context.filename, eval_exec) + self.source = source + + def execute(self, context): + # Save __children__ from the local bindings + save_children = context.local_bindings.get('__children__') + # Execute the code with our __children__ in scope + context.local_bindings['__children__'] = self.children + context.local_bindings['__file__'] = self.filename + result = eval(self.code, context.local_bindings) + + if context.local_bindings['__children__'] is not self.children: + raise ValueError("The code is not allowed to mutate __children__") + # Restore the bindings + context.local_bindings['__children__'] = save_children + + # If we got a result, the code was an expression, so append + # its value + if result is not None \ + or (isinstance(result, str) and result != ''): + from numbers import Number, Integral + result_string = None + if isinstance(result, Number) and not isinstance(result, Integral): + result_string = repr(result) + elif isinstance(result, Integral) or isinstance(result, list): + result_string = str(result) + else: + result_string = result + context.append_text( + result_string, self.filename, self.start_line_number) + + def __str__(self, indent=''): + source_lines = re.sub(r'^\n', '', strip_trailing_nl( + self.source), flags=re.MULTILINE).split('\n') + if len(source_lines) == 1: + s = indent + 'Code: {' + source_lines[0] + '}' + else: + s = indent + 'Code:\n' + indent + '{\n' + '\n'.join( + indent + 4 * ' ' + line for line in source_lines + ) + '\n' + indent + '}' + return s + self.format_children(indent) + + +def expand(filename, line_directive=_default_line_directive, **local_bindings): + r"""Return the contents of the given template file, executed with the given + local bindings. + + >>> from tempfile import NamedTemporaryFile + >>> # On Windows, the name of a NamedTemporaryFile cannot be used to open + >>> # the file for a second time if delete=True. Therefore, we have to + >>> # manually handle closing and deleting this file to allow us to open + >>> # the file by its name across all platforms. + >>> f = NamedTemporaryFile(delete=False) + >>> _ = f.write( + ... br'''--- + ... % for i in range(int(x)): + ... a pox on ${i} for epoxy + ... % end + ... ${120 + + ... + ... 3} + ... abc + ... ${"w\nx\nX\ny"} + ... z + ... ''') + >>> f.flush() + >>> result = expand( + ... f.name, + ... line_directive='//#sourceLocation(file: "%(file)s", ' + \ + ... 'line: %(line)d)', + ... x=2 + ... ).replace( + ... '"%s"' % f.name.replace('\\', '/'), '"dummy.file"') + >>> print(result, end='') + //#sourceLocation(file: "dummy.file", line: 1) + --- + //#sourceLocation(file: "dummy.file", line: 3) + a pox on 0 for epoxy + //#sourceLocation(file: "dummy.file", line: 3) + a pox on 1 for epoxy + //#sourceLocation(file: "dummy.file", line: 5) + 123 + //#sourceLocation(file: "dummy.file", line: 8) + abc + w + x + X + y + //#sourceLocation(file: "dummy.file", line: 10) + z + >>> f.close() + >>> os.remove(f.name) + """ + with io.open(filename, encoding='utf-8') as f: + t = parse_template(filename, f.read()) + d = os.getcwd() + os.chdir(os.path.dirname(os.path.abspath(filename))) + try: + return execute_template( + t, line_directive=line_directive, **local_bindings) + finally: + os.chdir(d) + + +def parse_template(filename, text=None): + r"""Return an AST corresponding to the given template file. + + If text is supplied, it is assumed to be the contents of the file, + as a string. + + >>> print(parse_template('dummy.file', text= + ... '''% for x in [1, 2, 3]: + ... % if x == 1: + ... literal1 + ... % elif x > 1: # add output line after this line to fix bug + ... % if x == 2: + ... literal2 + ... % end + ... % end + ... % end + ... ''')) + Block: + [ + Code: + { + for x in [1, 2, 3]: + __children__[0].execute(__context__) + } + [ + Block: + [ + Code: + { + if x == 1: + __children__[0].execute(__context__) + elif x > 1: # add output line after this line to fix bug + __children__[1].execute(__context__) + } + [ + Block: + [ + Literal: + literal1 + ] + Block: + [ + Code: + { + if x == 2: + __children__[0].execute(__context__) + } + [ + Block: + [ + Literal: + literal2 + ] + ] + ] + ] + ] + ] + ] + + >>> print(parse_template( + ... 'dummy.file', + ... text='%for x in range(10):\n% print(x)\n%end\njuicebox')) + Block: + [ + Code: + { + for x in range(10): + __children__[0].execute(__context__) + } + [ + Block: + [ + Code: {print(x)} [] + ] + ] + Literal: + juicebox + ] + + >>> print(parse_template('/dummy.file', text= + ... '''Nothing + ... % if x: + ... % for i in range(3): + ... ${i} + ... % end + ... % else: + ... THIS SHOULD NOT APPEAR IN THE OUTPUT + ... ''')) + Block: + [ + Literal: + Nothing + Code: + { + if x: + __children__[0].execute(__context__) + else: + __children__[1].execute(__context__) + } + [ + Block: + [ + Code: + { + for i in range(3): + __children__[0].execute(__context__) + } + [ + Block: + [ + Code: {(i)} [] + Literal: + + ] + ] + ] + Block: + [ + Literal: + THIS SHOULD NOT APPEAR IN THE OUTPUT + ] + ] + ] + + >>> print(parse_template('dummy.file', text='''% + ... %for x in y: + ... % print(y) + ... ''')) + Block: + [ + Code: + { + for x in y: + __children__[0].execute(__context__) + } + [ + Block: + [ + Code: {print(y)} [] + ] + ] + ] + + >>> print(parse_template('dummy.file', text='''% + ... %if x: + ... % print(y) + ... AAAA + ... %else: + ... BBBB + ... ''')) + Block: + [ + Code: + { + if x: + __children__[0].execute(__context__) + else: + __children__[1].execute(__context__) + } + [ + Block: + [ + Code: {print(y)} [] + Literal: + AAAA + ] + Block: + [ + Literal: + BBBB + ] + ] + ] + + >>> print(parse_template('dummy.file', text='''% + ... %if x: + ... % print(y) + ... AAAA + ... %# This is a comment + ... %else: + ... BBBB + ... ''')) + Block: + [ + Code: + { + if x: + __children__[0].execute(__context__) + # This is a comment + else: + __children__[1].execute(__context__) + } + [ + Block: + [ + Code: {print(y)} [] + Literal: + AAAA + ] + Block: + [ + Literal: + BBBB + ] + ] + ] + + >>> print(parse_template('dummy.file', text='''\ + ... %for x in y: + ... AAAA + ... %if x: + ... BBBB + ... %end + ... CCCC + ... ''')) + Block: + [ + Code: + { + for x in y: + __children__[0].execute(__context__) + } + [ + Block: + [ + Literal: + AAAA + Code: + { + if x: + __children__[0].execute(__context__) + } + [ + Block: + [ + Literal: + BBBB + ] + ] + Literal: + CCCC + ] + ] + ] + """ + return Block(ParseContext(filename, text)) + + +def execute_template( + ast, line_directive=_default_line_directive, **local_bindings): + r"""Return the text generated by executing the given template AST. + + Keyword arguments become local variable bindings in the execution context + + >>> root_directory = os.path.abspath('/') + >>> file_name = (root_directory + 'dummy.file').replace('\\', '/') + >>> ast = parse_template(file_name, text= + ... '''Nothing + ... % if x: + ... % for i in range(3): + ... ${i} + ... % end + ... % else: + ... THIS SHOULD NOT APPEAR IN THE OUTPUT + ... ''') + >>> out = execute_template(ast, + ... line_directive='//#sourceLocation(file: "%(file)s", line: %(line)d)', + ... x=1) + >>> out = out.replace(file_name, "DUMMY-FILE") + >>> print(out, end="") + //#sourceLocation(file: "DUMMY-FILE", line: 1) + Nothing + //#sourceLocation(file: "DUMMY-FILE", line: 4) + 0 + //#sourceLocation(file: "DUMMY-FILE", line: 4) + 1 + //#sourceLocation(file: "DUMMY-FILE", line: 4) + 2 + + >>> ast = parse_template(file_name, text= + ... '''Nothing + ... % a = [] + ... % for x in range(3): + ... % a.append(x) + ... % end + ... ${a} + ... ''') + >>> out = execute_template(ast, + ... line_directive='//#sourceLocation(file: "%(file)s", line: %(line)d)', + ... x=1) + >>> out = out.replace(file_name, "DUMMY-FILE") + >>> print(out, end="") + //#sourceLocation(file: "DUMMY-FILE", line: 1) + Nothing + //#sourceLocation(file: "DUMMY-FILE", line: 6) + [0, 1, 2] + + >>> ast = parse_template(file_name, text= + ... '''Nothing + ... % a = [] + ... % for x in range(3): + ... % a.append(x) + ... % end + ... ${a} + ... ''') + >>> out = execute_template(ast, + ... line_directive='#line %(line)d "%(file)s"', x=1) + >>> out = out.replace(file_name, "DUMMY-FILE") + >>> print(out, end="") + #line 1 "DUMMY-FILE" + Nothing + #line 6 "DUMMY-FILE" + [0, 1, 2] + """ + execution_context = ExecutionContext( + line_directive=line_directive, **local_bindings) + ast.execute(execution_context) + return ''.join(execution_context.result_text) + + +def main(): + import argparse + import sys + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='Generate Your Boilerplate!', epilog=''' + A GYB template consists of the following elements: + + - Literal text which is inserted directly into the output + + - %% or $$ in literal text, which insert literal '%' and '$' + symbols respectively. + + - Substitutions of the form ${}. The Python + expression is converted to a string and the result is inserted + into the output. + + - Python code delimited by %{...}%. Typically used to inject + definitions (functions, classes, variable bindings) into the + evaluation context of the template. Common indentation is + stripped, so you can add as much indentation to the beginning + of this code as you like + + - Lines beginning with optional whitespace followed by a single + '%' and Python code. %-lines allow you to nest other + constructs inside them. To close a level of nesting, use the + "%end" construct. + + - Lines beginning with optional whitespace and followed by a + single '%' and the token "end", which close open constructs in + %-lines. + + Example template: + + - Hello - + %{ + x = 42 + def succ(a): + return a+1 + }% + + I can assure you that ${x} < ${succ(x)} + + % if int(y) > 7: + % for i in range(3): + y is greater than seven! + % end + % else: + y is less than or equal to seven + % end + + - The End. - + + When run with "gyb -Dy=9", the output is + + - Hello - + + I can assure you that 42 < 43 + + y is greater than seven! + y is greater than seven! + y is greater than seven! + + - The End. - +''' + ) + parser.add_argument( + '-D', action='append', dest='defines', metavar='NAME=VALUE', + default=[], + help='''Bindings to be set in the template's execution context''') + + parser.add_argument( + 'file', type=str, + help='Path to GYB template file (defaults to stdin)', nargs='?', + default='-') + parser.add_argument( + '-o', dest='target', type=str, + help='Output file (defaults to stdout)', default='-') + parser.add_argument( + '--test', action='store_true', + default=False, help='Run a self-test') + parser.add_argument( + '--verbose-test', action='store_true', + default=False, help='Run a verbose self-test') + parser.add_argument( + '--dump', action='store_true', + default=False, help='Dump the parsed template to stdout') + parser.add_argument( + '--line-directive', + default=_default_line_directive, + help=''' + Line directive format string, which will be + provided 2 substitutions, `%%(line)d` and `%%(file)s`. + + Example: `#sourceLocation(file: "%%(file)s", line: %%(line)d)` + + The default works automatically with the `line-directive` tool, + which see for more information. + ''') + + args = parser.parse_args(sys.argv[1:]) + + if args.test or args.verbose_test: + import doctest + selfmod = sys.modules[__name__] + if doctest.testmod(selfmod, verbose=args.verbose_test or None).failed: + sys.exit(1) + + bindings = dict(x.split('=', 1) for x in args.defines) + if args.file == '-': + ast = parse_template('stdin', sys.stdin.read()) + else: + with io.open(os.path.normpath(args.file), 'r', encoding='utf-8') as f: + ast = parse_template(args.file, f.read()) + if args.dump: + print(ast) + + # Allow the template to open files and import .py files relative to its own + # directory + saved_cwd = os.getcwd() + os.chdir(os.path.dirname(os.path.abspath(args.file))) + sys.path = ['.'] + sys.path + result_text = execute_template(ast, args.line_directive, **bindings) + + if args.target == '-': + sys.stdout.write(result_text) + else: + os.chdir(saved_cwd) + with io.open(args.target, 'w', encoding='utf-8', newline='\n') as f: + f.write(result_text) + + +if __name__ == '__main__': + main()