Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve translation system workflow #17214

Merged
merged 39 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d3d2056
make md2html.py a standalone script in user_docs that can be used by …
michaelDCurran Sep 9, 2024
552cba5
markdownTranslate translateXliff: include line number on exceptions. …
michaelDCurran Sep 11, 2024
6cb9d07
Add a utility script for translators called nvdaL10nUtil which does a…
michaelDCurran Sep 11, 2024
4ba0f60
nuitka is now a requirement, for building the standalone nvdaL10nUtil…
michaelDCurran Sep 11, 2024
87a5c9c
Merge branch 'beta' into md2html
michaelDCurran Sep 11, 2024
01635f8
Merge branch 'beta' into md2html
michaelDCurran Sep 17, 2024
7bab7b1
markdownTranslate generateMarkdown: ignore bad translations containin…
michaelDCurran Sep 17, 2024
45886d8
markdownTranslate translateXliff: do not xmlEscape the pretranslated …
michaelDCurran Sep 18, 2024
ba832ab
markdownTranslate generateMarkdown: warn for corrupt or escaped lines
michaelDCurran Sep 18, 2024
536c068
markdownTranslate: It is not correct to escape / unescape text when s…
michaelDCurran Sep 22, 2024
e163f4d
nvdaL10nUtil: add stripXliff command which removes everything from an…
michaelDCurran Sep 22, 2024
496b2d0
gitignore .md.sub files now generated by scons while generating html …
michaelDCurran Sep 22, 2024
b75d77b
nvdaL10nutil: add xliff2html command which incorporates xliff2md and …
michaelDCurran Sep 24, 2024
1418cab
Update translation documentation to remove no longer relevant info an…
michaelDCurran Sep 25, 2024
66f634a
Pre-commit auto-fix
pre-commit-ci[bot] Sep 25, 2024
40dd89e
Apply suggestions from code review
michaelDCurran Sep 27, 2024
90f9385
nvdaL10nUtil xliff2html: autodetect language from the xliff file.
michaelDCurran Sep 30, 2024
410e717
Apply suggestions from code review
seanbudd Oct 2, 2024
a7acfc3
Merge branch 'beta' into md2html
michaelDCurran Oct 2, 2024
84bcf9e
crowdin.md: don't suggest -l for nvdaL10nUtil xliff2html as it now fe…
michaelDCurran Oct 2, 2024
4daa28d
sconstruct: pass outputDir variable to nuitka rather than hardcoding …
michaelDCurran Oct 2, 2024
d2c4c35
md2html: remove unnneeded import
michaelDCurran Oct 2, 2024
ba63183
md2html: don't override lang with old logic to fetch language from th…
michaelDCurran Oct 2, 2024
8f82c5e
md2html: restore extraStylesheet numberedHeadings.css for userGuide a…
michaelDCurran Oct 2, 2024
e645b88
Update projectDocs/translating/crowdin.md
michaelDCurran Oct 2, 2024
7b25575
md2html: remove another unused import
michaelDCurran Oct 2, 2024
7707641
nvdaL10nUtil: remove unused variable
michaelDCurran Oct 2, 2024
88d1fa9
Linting
michaelDCurran Oct 2, 2024
7fa540e
Apply suggestions from code review
michaelDCurran Oct 2, 2024
8fe8f23
nvdaL10nUtil: add / update docstrings.
michaelDCurran Oct 2, 2024
39085a9
Update projectDocs/dev/developerGuide/sconscript
michaelDCurran Oct 3, 2024
a98cb61
sconstruct: provide a specific nvdaL10nUtil action for building nvdaL…
michaelDCurran Oct 3, 2024
f717172
appVeyor: specifically build nvdaL10nUtil
michaelDCurran Oct 3, 2024
63974e3
Mark nuitka as having an okay license (Apache, only used for build).
michaelDCurran Oct 3, 2024
121dc16
fix pre-commit linting
seanbudd Oct 3, 2024
cdd299a
apply pre-commit
seanbudd Oct 3, 2024
74f23bc
sconstruct: pass --assume-yes-for-downloads to Nuitka so that it does…
michaelDCurran Oct 4, 2024
de2f36e
sconstruct: only keep the exe when building nvdaL10nUtil with Nuitka
michaelDCurran Oct 4, 2024
d3a66ea
Check that paths are on the same drive before calling relpath
SaschaCowley Oct 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
apply pre-commit
  • Loading branch information
seanbudd committed Oct 3, 2024
commit cdd299a837b392f955edfec4141f5674a68f244b
90 changes: 62 additions & 28 deletions user_docs/markdownTranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def prettyPathString(path: str) -> str:

@contextlib.contextmanager
def createAndDeleteTempFilePath_contextManager(
dir: str | None = None, prefix: str | None = None, suffix: str | None = None
dir: str | None = None,
prefix: str | None = None,
suffix: str | None = None,
) -> Generator[str, None, None]:
"""A context manager that creates a temporary file and deletes it when the context is exited"""
with tempfile.NamedTemporaryFile(dir=dir, prefix=prefix, suffix=suffix, delete=False) as tempFile:
Expand All @@ -63,7 +65,10 @@ def getLastCommitID(filePath: str) -> str:
def getGitDir() -> str:
# Run the git rev-parse command to get the root of the git directory
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"], capture_output=True, text=True, check=True
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
text=True,
check=True,
)
gitDir = result.stdout.strip()
if not os.path.isdir(gitDir):
Expand Down Expand Up @@ -129,7 +134,7 @@ def generateSkeleton(mdPath: str, outputPath: str) -> Result_generateSkeleton:
skelLine = mdLine
outputFile.write(skelLine)
print(
f"Generated skeleton file with {res.numTotalLines} total lines and {res.numTranslationPlaceholders} translation placeholders"
f"Generated skeleton file with {res.numTotalLines} total lines and {res.numTranslationPlaceholders} translation placeholders",
)
return res

Expand Down Expand Up @@ -162,10 +167,13 @@ def extractSkeleton(xliffPath: str, outputPath: str):


def updateSkeleton(
origMdPath: str, newMdPath: str, origSkelPath: str, outputPath: str
origMdPath: str,
newMdPath: str,
origSkelPath: str,
outputPath: str,
) -> Result_updateSkeleton:
print(
f"Creating updated skeleton file {prettyPathString(outputPath)} from {prettyPathString(origSkelPath)} with changes from {prettyPathString(origMdPath)} to {prettyPathString(newMdPath)}..."
f"Creating updated skeleton file {prettyPathString(outputPath)} from {prettyPathString(origSkelPath)} with changes from {prettyPathString(origMdPath)} to {prettyPathString(newMdPath)}...",
)
res = Result_updateSkeleton()
with contextlib.ExitStack() as stack:
Expand Down Expand Up @@ -203,7 +211,7 @@ def updateSkeleton(
f"Updated skeleton file with {res.numAddedLines} added lines "
f"({res.numAddedTranslationPlaceholders} translation placeholders), "
f"{res.numRemovedLines} removed lines ({res.numRemovedTranslationPlaceholders} translation placeholders), "
f"and {res.numUnchangedLines} unchanged lines ({res.numUnchangedTranslationPlaceholders} translation placeholders)"
f"and {res.numUnchangedLines} unchanged lines ({res.numUnchangedTranslationPlaceholders} translation placeholders)",
)
return res

Expand All @@ -226,14 +234,14 @@ def generateXliff(
dir=os.path.dirname(outputPath),
prefix=os.path.basename(mdPath),
suffix=".skel",
)
),
)
generateSkeleton(mdPath=mdPath, outputPath=skelPath)
with open(skelPath, "r", encoding="utf8") as skelFile:
skelContent = skelFile.read()
res = Result_generateXliff()
print(
f"Generating xliff file {prettyPathString(outputPath)} from {prettyPathString(mdPath)} and {prettyPathString(skelPath)}..."
f"Generating xliff file {prettyPathString(outputPath)} from {prettyPathString(mdPath)} and {prettyPathString(skelPath)}...",
)
with contextlib.ExitStack() as stack:
mdFile = stack.enter_context(open(mdPath, "r", encoding="utf8"))
Expand All @@ -244,12 +252,13 @@ def generateXliff(
outputFile.write(
'<?xml version="1.0"?>\n'
f'<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en">\n'
f'<file id="{fileID}" original="{mdUri}">\n'
f'<file id="{fileID}" original="{mdUri}">\n',
)
outputFile.write(f"<skeleton>\n{xmlEscape(skelContent)}\n</skeleton>\n")
res.numTranslatableStrings = 0
for lineNo, (mdLine, skelLine) in enumerate(
zip_longest(mdFile.readlines(), skelContent.splitlines(keepends=True)), start=1
zip_longest(mdFile.readlines(), skelContent.splitlines(keepends=True)),
start=1,
):
mdLine = mdLine.rstrip()
skelLine = skelLine.rstrip()
Expand All @@ -262,7 +271,7 @@ def generateXliff(
raise ValueError(f'Line {lineNo}: does not end with "{suffix}", {mdLine=}, {skelLine=}')
source = mdLine[len(prefix) : len(mdLine) - len(suffix)]
outputFile.write(
f'<unit id="{ID}">\n' "<notes>\n" f'<note appliesTo="source">line: {lineNo + 1}</note>\n'
f'<unit id="{ID}">\n' "<notes>\n" f'<note appliesTo="source">line: {lineNo + 1}</note>\n',
)
if prefix:
outputFile.write(f'<note appliesTo="source">prefix: {xmlEscape(prefix)}</note>\n')
Expand All @@ -273,7 +282,7 @@ def generateXliff(
f"<segment>\n"
f"<source>{xmlEscape(source)}</source>\n"
"</segment>\n"
"</unit>\n"
"</unit>\n",
)
else:
if mdLine != skelLine:
Expand All @@ -296,19 +305,19 @@ def updateXliff(
# uses generateMarkdown, extractSkeleton, updateSkeleton, and generateXliff to generate an updated xliff file.
outputDir = os.path.dirname(outputPath)
print(
f"Generating updated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} and {prettyPathString(mdPath)}..."
f"Generating updated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} and {prettyPathString(mdPath)}...",
)
with contextlib.ExitStack() as stack:
origMdPath = stack.enter_context(
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="generated_", suffix=".md")
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="generated_", suffix=".md"),
)
generateMarkdown(xliffPath=xliffPath, outputPath=origMdPath, translated=False)
origSkelPath = stack.enter_context(
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="extracted_", suffix=".skel")
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="extracted_", suffix=".skel"),
)
extractSkeleton(xliffPath=xliffPath, outputPath=origSkelPath)
updatedSkelPath = stack.enter_context(
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="updated_", suffix=".skel")
createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="updated_", suffix=".skel"),
)
updateSkeleton(
origMdPath=origMdPath,
Expand All @@ -332,7 +341,7 @@ def translateXliff(
allowBadAnchors: bool = False,
) -> Result_translateXliff:
print(
f"Creating {lang} translated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} using {prettyPathString(pretranslatedMdPath)}..."
f"Creating {lang} translated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} using {prettyPathString(pretranslatedMdPath)}...",
)
res = Result_translateXliff()
with contextlib.ExitStack() as stack:
Expand All @@ -357,15 +366,15 @@ def translateXliff(
prefix, ID, suffix = m.groups()
if prefix and not pretranslatedLine.startswith(prefix):
raise ValueError(
f'Line {lineNo} of translation does not start with "{prefix}", {pretranslatedLine=}, {skelLine=}'
f'Line {lineNo} of translation does not start with "{prefix}", {pretranslatedLine=}, {skelLine=}',
)
if suffix and not pretranslatedLine.endswith(suffix):
if allowBadAnchors and (m := re_heading.match(pretranslatedLine)):
print(f"Warning: ignoring bad anchor in line {lineNo}: {pretranslatedLine}")
suffix = m.group(3)
if suffix and not pretranslatedLine.endswith(suffix):
raise ValueError(
f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}'
f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}',
)
translation = pretranslatedLine[len(prefix) : len(pretranslatedLine) - len(suffix)]
try:
Expand All @@ -387,7 +396,7 @@ def translateXliff(
raise
elif skelLine != pretranslatedLine:
raise ValueError(
f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}"
f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}",
)
xliff.write(outputPath, encoding="utf8", xml_declaration=True)
print(f"Translated xliff file with {res.numTranslatedStrings} translated strings")
Expand Down Expand Up @@ -460,7 +469,7 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) -
else:
outputFile.write(line)
print(
f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings. Ignoring {res.numBadTranslationStrings} bad translated strings"
f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings. Ignoring {res.numBadTranslationStrings} bad translated strings",
)
return res

Expand All @@ -480,7 +489,7 @@ def ensureMarkdownFilesMatch(path1: str, path2: str, allowBadAnchors: bool = Fal
and line1.count("|") == line2.count("|")
):
print(
f"Warning: ignoring cell padding of post table header line at line {lineNo}: {line1}, {line2}"
f"Warning: ignoring cell padding of post table header line at line {lineNo}: {line1}, {line2}",
)
continue
if (
Expand All @@ -489,7 +498,7 @@ def ensureMarkdownFilesMatch(path1: str, path2: str, allowBadAnchors: bool = Fal
and line1.count("|") == line2.count("|")
):
print(
f"Warning: ignoring cell padding of hidden header row at line {lineNo}: {line1}, {line2}"
f"Warning: ignoring cell padding of hidden header row at line {lineNo}: {line1}, {line2}",
)
continue
if allowBadAnchors and (m1 := re_heading.match(line1)) and (m2 := re_heading.match(line2)):
Expand Down Expand Up @@ -577,14 +586,29 @@ def pretranslateAllPossibleLanguages(langsDir: str, mdBaseName: str):
help="The markdown file to generate the xliff file for",
)
generateXliffParser.add_argument(
"-o", "--output", dest="output", type=str, required=True, help="The file to output the xliff file to"
"-o",
"--output",
dest="output",
type=str,
required=True,
help="The file to output the xliff file to",
)
updateXliffParser = commandParser.add_parser("updateXliff")
updateXliffParser.add_argument(
"-x", "--xliff", dest="xliff", type=str, required=True, help="The original xliff file"
"-x",
"--xliff",
dest="xliff",
type=str,
required=True,
help="The original xliff file",
)
updateXliffParser.add_argument(
"-m", "--newMarkdown", dest="md", type=str, required=True, help="The new markdown file"
"-m",
"--newMarkdown",
dest="md",
type=str,
required=True,
help="The new markdown file",
)
updateXliffParser.add_argument(
"-o",
Expand All @@ -596,10 +620,20 @@ def pretranslateAllPossibleLanguages(langsDir: str, mdBaseName: str):
)
translateXliffParser = commandParser.add_parser("translateXliff")
translateXliffParser.add_argument(
"-x", "--xliff", dest="xliff", type=str, required=True, help="The xliff file to translate"
"-x",
"--xliff",
dest="xliff",
type=str,
required=True,
help="The xliff file to translate",
)
translateXliffParser.add_argument(
"-l", "--lang", dest="lang", type=str, required=True, help="The language to translate to"
"-l",
"--lang",
dest="lang",
type=str,
required=True,
help="The language to translate to",
)
translateXliffParser.add_argument(
"-p",
Expand Down
4 changes: 2 additions & 2 deletions user_docs/md2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"markdown_link_attr_modifier",
# Adds links to GitHub authors, issues and PRs
"mdx_gh_links",
}
},
)

EXTENSIONS_CONFIG = {
Expand Down Expand Up @@ -160,7 +160,7 @@ def main(source: str, dest: str, lang: str = "en", docType: str | None = None):
dir="rtl" if lang in RTL_LANG_CODES else "ltr",
title=title,
extraStylesheet=extraStylesheet,
)
),
)

htmlOutput = _generateSanitizedHTML(mdStr, isKeyCommands)
Expand Down
13 changes: 9 additions & 4 deletions user_docs/nvdaL10nUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None = None)
elif oldXliffRoot is not None:
unitId = unit.get("id")
oldTarget = oldXliffRoot.find(
f'./xliff:file/xliff:unit[@id="{unitId}"]/xliff:segment/xliff:target', namespaces=namespace
f'./xliff:file/xliff:unit[@id="{unitId}"]/xliff:segment/xliff:target',
namespaces=namespace,
)
if oldTarget is not None and oldTarget.text == targetText:
existingTranslationCount += 1
Expand All @@ -114,7 +115,7 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None = None)
segmentCount - emptyTargetCount - corruptTargetcount - sourceTargetcount - existingTranslationCount
)
print(
f"Processed {segmentCount} segments, removing {emptyTargetCount} empty targets, {corruptTargetcount} corrupt targets, {sourceTargetcount} source targets, and {existingTranslationCount} existing translations, resulting in {keptTranslations} translations kept"
f"Processed {segmentCount} segments, removing {emptyTargetCount} empty targets, {corruptTargetcount} corrupt targets, {sourceTargetcount} source targets, and {existingTranslationCount} existing translations, resulting in {keptTranslations} translations kept",
)


Expand Down Expand Up @@ -177,7 +178,9 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None = None)
match args.command:
case "xliff2md":
markdownTranslate.generateMarkdown(
xliffPath=args.xliffPath, outputPath=args.mdPath, translated=not args.untranslated
xliffPath=args.xliffPath,
outputPath=args.mdPath,
translated=not args.untranslated,
)
case "md2html":
md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType)
Expand All @@ -187,7 +190,9 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None = None)
temp_mdFile.close()
try:
markdownTranslate.generateMarkdown(
xliffPath=args.xliffPath, outputPath=temp_mdFile.name, translated=not args.untranslated
xliffPath=args.xliffPath,
outputPath=temp_mdFile.name,
translated=not args.untranslated,
)
md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=lang, docType=args.docType)
finally:
Expand Down