scripts/isolate_tests.py

#!/usr/bin/env python3
#
# This script reads C++ or RST source files and writes all
# multi-line strings into individual files.
# This can be used to extract the Solidity test cases
# into files for e.g. fuzz testing as
# scripts/isolate_tests.py test/libsolidity/*

import re
import os
import hashlib
from os.path import join, isfile, basename
from argparse import ArgumentParser
from textwrap import indent, dedent

def extract_test_cases(path):
    with open(path, encoding="utf8", errors='ignore', mode='r', newline='') as file:
        lines = file.read().splitlines()

    inside = False
    delimiter = ''
    tests = []

    for l in lines:
        if inside:
            if l.strip().endswith(')' + delimiter + '";'):
                inside = False
            else:
                tests[-1] += l + '\n'
        else:
            m = re.search(r'R"([^(]*)\($', l.strip())
            if m:
                inside = True
                delimiter = m.group(1)
                tests += ['']

    return tests

def extract_solidity_docs_cases(path):
    tests = extract_docs_cases(path, [".. code-block:: solidity", '::'])

    codeStart = "(// SPDX-License-Identifier:|pragma solidity|contract.*{|library.*{|interface.*{)"

    # Filter out tests that are not supposed to be compilable.
    return [
        test.lstrip("\n")
        for test in tests
        if re.search(r'^\s{4}' + codeStart, test, re.MULTILINE) is not None
    ]

def extract_yul_docs_cases(path):
    tests = extract_docs_cases(path, [".. code-block:: yul"])

    def wrap_in_object(code):
        for line in code.splitlines():
            line = line.lstrip()
            if line.startswith("//"):
                continue
            if not line.startswith("object") and not line.startswith("{"):
                return indent(f"{{\n{code.rstrip()}\n}}\n\n", "    ")
            break

        return code

    return [
        wrap_in_object(test)
        for test in tests
        if test.strip() != ""
    ]

# Extract code examples based on the 'beginMarker' parameter
# up until we reach EOF or a line that is not empty and doesn't start with 4
# spaces.
def extract_docs_cases(path, beginMarkers):
    immediatelyAfterMarker = False
    insideBlock = False
    tests = []

    # Collect all snippets of indented blocks
    with open(path, mode='r', errors='ignore', encoding='utf8', newline='') as f:
        lines = f.read().splitlines()

    for line in lines:
        if insideBlock:
            if immediatelyAfterMarker:
                # Skip Sphinx instructions and empty lines between them
                if line == '' or line.lstrip().startswith(":"):
                    continue

            if line == '' or line.startswith(" "):
                tests[-1] += line + "\n"
                immediatelyAfterMarker = False
                continue

            insideBlock = False
        if any(map(line.lower().startswith, beginMarkers)):
            insideBlock = True
            immediatelyAfterMarker = True
            tests += ['']

    return tests

def write_cases(f, solidityTests, yulTests):
    cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()
    for language, test in [("sol", t) for t in solidityTests] + [("yul", t) for t in yulTests]:
        # When code examples are extracted they are indented by 8 spaces, which violates the style guide,
        # so before checking remove 4 spaces from each line.
        remainder = dedent(test)
        source_code_hash = hashlib.sha256(test.encode("utf-8")).hexdigest()
        sol_filename = f'test_{source_code_hash}_{cleaned_filename}.{language}'
        with open(sol_filename, mode='w', encoding='utf8', newline='') as fi:
            fi.write(remainder)

def extract_and_write(path, language):
    assert language in ["solidity", "yul", ""]
    yulCases = []
    cases = []

    if path.lower().endswith('.rst'):
        if language in ("solidity", ""):
            cases = extract_solidity_docs_cases(path)

        if language in ("yul", ""):
            yulCases  = extract_yul_docs_cases(path)
    elif path.endswith('.sol'):
        if language in ("solidity", ""):
            with open(path, mode='r', encoding='utf8', newline='') as f:
                cases = [f.read()]
    else:
        cases = extract_test_cases(path)

    write_cases(basename(path), cases, yulCases)

if __name__ == '__main__':
    script_description = (
        "Reads Solidity, C++ or RST source files and extracts compilable solidity and yul code blocks from them. "
        "Can be used to generate test cases to validate code examples. "
    )

    parser = ArgumentParser(description=script_description)
    parser.add_argument(dest='path', help='Path to file or directory to look for code in.')
    parser.add_argument(
        '-l', '--language',
        dest='language',
        choices=["yul", "solidity"],
        default="",
        action='store',
        help="Extract only code blocks in the given language"
    )
    options = parser.parse_args()
    path = options.path

    if isfile(path):
        extract_and_write(path, options.language)
    else:
        for root, subdirs, files in os.walk(path):
            if '_build' in subdirs:
                subdirs.remove('_build')
            if 'compilationTests' in subdirs:
                subdirs.remove('compilationTests')
            for f in files:
                if basename(f) == "invalid_utf8_sequence.sol":
                    continue  # ignore the test with broken utf-8 encoding
                path = join(root, f)
                extract_and_write(path, options.language)