-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean-spell
executable file
·104 lines (88 loc) · 3.31 KB
/
clean-spell
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env raku
# Remove words in xt/pws/*.pws that are no longer needed
# * Bug fixes in the spell checker have removed the need
# to check certain words.
# * Edits to the docs themselves no longer use some words.
#
# set UTIL_CLEAN_SPELL_REGEX environment variable to
# only check words that match
# that regex.
#
# set UTIL_CLEAN_SPELL_LAST environment variable to
# note the last word that was processed. The
# script will continue with the next word
#
# This test is slow, this gives
# us an easy way to chunk the test runs.
#
# Try to be clever and only test files that match the word,
# even if it's a partial match to speed up the testing.
#
# Trust but verify: make sure you rerun the entire spell check
# after letting this program update the .pws files
use File::Temp;
use lib $*PROGRAM.parent(2).child('lib');
use Test-Files;
use Pod::Cache;
my $regex = %*ENV<UTIL_CLEAN_SPELL_REGEX> // ".";
my $last = %*ENV<UTIL_CLEAN_SPELL_LAST> // "";
# How many files to check at a time?
my $at-a-time = 4;
# Check the same files as xt/aspell.t does by default...
my @files = Test-Files.documents.grep({not $_ ~~ / 'README.' .. '.md' /});
# ... but use pre-generated/rendered Pod6 files for our quick search.
note "Caching rakudoc files...";
@files = @files.map({
$_.ends-with('.rakudoc') ?? Pod::Cache.cache-file($_) !! $_;
});
for <xt/pws/words.pws xt/pws/code.pws> -> $dict {
for $dict.IO.lines -> $word {
next unless $word gt $last;
next unless $word ~~ /<$regex>/;
next if $word.starts-with('personal_ws-1.1 en');
note "Testing $dict / $word ";
my $proc = run( 'grep', '-li', $word, |@files, :out);
my $output = $proc.out.slurp;
# remove word, keep pointer to backup lexicon
my $backup = erase-word($dict, $word);
if $output eq '' {
note "\tnot found, removing.";
} else {
my @min-files = $output.lines;
note "\tfound in {+@min-files} files, testing.";
my $all-ok = True;
# use rotor, but get the partial chunk first
# so we can fail slightly faster
for @min-files.reverse.rotor($at-a-time, :partial).reverse -> @test-files {
note "\t\t" ~ @test-files.join("\n\t\t");
my $proc = run( 'xt/aspell.t', |@test-files.reverse, :out, :err);
if $proc.exitcode != 0 {
$all-ok = False;
note "\taspell test failed, keeping word";
run('mv', $backup, $dict);
last; # no need to test other files
}
}
if $all-ok {
note "\taspell test passed, removing word";
# We removed the word to do the test, so just leave as is.
}
}
}
}
sub erase-word($dict, $word) {
# Create a temp copy of the lexicon that doesn't contain the word
my ($tmp_fname, $tmp_io) = tempfile;
for $dict.IO.lines -> $i {
$tmp_io.say($i) unless $i eq $word;
}
$tmp_io.close;
# backup the dictionary file
my ($backup_fname, $bkp_io) = tempfile;
$bkp_io.close;
run('cp', $dict, $backup_fname);
# try the updated copy
run('mv', $tmp_fname, $dict);
# return a link to the last good copy of the file in case caller needs to restore it.
return $backup_fname;
}