This repository has been archived by the owner on Oct 24, 2020. It is now read-only.
forked from EFForg/https-everywhere
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-rulesets.py
105 lines (87 loc) · 3.47 KB
/
merge-rulesets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python2.7
# Merge all the .xml rulesets into a single "default.rulesets" file -- this
# prevents inodes from wasting disk space, but more importantly, works around
# the fact that zip does not perform well on a pile of small files.
# currently a very literal translation of merge-rulesets.sh, but about five
# times faster
from __future__ import print_function
pass
import os
from glob import glob
from subprocess import call
import sys
import traceback
import re
import unicodedata
import argparse
parser = argparse.ArgumentParser(description='Merge rulesets.')
parser.add_argument('--source_dir', default='src/chrome/content/rules', help='source directory')
parser.add_argument('--fast', help='fast merge', action='store_true')
args = parser.parse_args()
def normalize(f):
"""
OSX and Linux filesystems encode composite characters differently in filenames.
We should normalize to NFC: http://unicode.org/reports/tr15/.
"""
f = unicodedata.normalize('NFC', unicode(f, 'utf-8')).encode('utf-8')
return f
rulesets_fn= args.source_dir + "/default.rulesets"
xml_ruleset_files = map(normalize, glob(args.source_dir + "/*.xml"))
# cleanup after bugs :/
misfile = rulesets_fn + "r"
if os.path.exists(misfile):
print("Cleaning up malformed rulesets file...")
os.unlink(misfile)
if args.fast:
library_compiled_time = os.path.getmtime(rulesets_fn)
newest_xml = max([os.path.getmtime(f) for f in xml_ruleset_files])
if library_compiled_time >= newest_xml:
print("Library is newer that all rulesets, skipping rebuild...")
sys.exit(0)
print("Creating ruleset library...")
# Under git bash, sed -i issues errors and sets the file "read only". Thanks.
if os.path.isfile(rulesets_fn):
os.system("chmod u+w " + rulesets_fn)
def rulesize():
return len(open(rulesets_fn).read())
def clean_up(rulefile):
"""Remove extra whitespace, comments and tests from a ruleset"""
comment_and_newline_pattern = re.compile(r"<!--.*?-->|\n|\r", flags=re.DOTALL)
rulefile = comment_and_newline_pattern.sub('', rulefile)
to_and_from_pattern = re.compile(r'\s*(from=)')
rulefile = to_and_from_pattern.sub(r' \1', rulefile)
rulefile = re.sub(r'"\s*(to=)', r'" \1', rulefile)
rulefile = re.sub(r">\s*<", r"><", rulefile)
rulefile = re.sub(r"</ruleset>\s*", r"</ruleset>\n", rulefile)
rulefile = re.sub(r"\s*(/>|<ruleset)", r"\1", rulefile)
rulefile = re.sub(r"<test.+?/>", r"", rulefile)
return rulefile
library = open(rulesets_fn,"w")
try:
commit_id = os.environ["GIT_COMMIT_ID"]
library.write('<rulesetlibrary gitcommitid="%s">' % commit_id)
except:
# Chromium
library.write('<rulesetlibrary>')
# Include the filename.xml as the "f" attribute
print("Removing whitespaces and comments...")
for rfile in sorted(xml_ruleset_files):
ruleset = open(rfile).read()
fn = os.path.basename(rfile)
ruleset = ruleset.replace("<ruleset", '<ruleset f="%s"' % fn, 1)
library.write(clean_up(ruleset))
library.write("</rulesetlibrary>\n")
library.close()
try:
if 0 == call(["xmllint", "--noout", rulesets_fn]):
print(rulesets_fn, "passed XML validity test.")
else:
print("ERROR:", rulesets_fn, "failed XML validity test!")
sys.exit(1)
except OSError as e:
if "No such file or directory" not in traceback.format_exc():
raise
print("WARNING: xmllint not present; validation of", rulesets_fn, " skipped.")
# We make default.rulesets at build time, but it shouldn't have a variable
# timestamp
call(["touch", "-r", "src/install.rdf", rulesets_fn])