forked from coreos/coreos-assembler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcmd-compress
executable file
·297 lines (257 loc) · 11.3 KB
/
cmd-compress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#!/usr/bin/env python3
# NOTE: PYTHONUNBUFFERED is set in the entrypoint for unbuffered output
#
# Compresses all images in a build.
import os
import sys
import json
import shutil
import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from cosalib.builds import Builds
from cosalib.meta import GenericBuildMeta
from cosalib.cmdlib import (
import_ostree_commit,
ncpu,
rm_allow_noent,
runcmd,
sha256sum_file,
write_json)
DEFAULT_COMPRESSOR = 'gzip'
cmd = sys.argv[0].rsplit("/", 1)[1]
if cmd == "cmd-uncompress" or cmd == "cmd-decompress":
DEFAULT_MODE = 'uncompress'
else:
DEFAULT_MODE = 'compress'
parser = argparse.ArgumentParser()
parser.add_argument("--build", help="Build ID")
parser.add_argument("--artifact", default=[], action='append',
help="Only compress given image ARTIFACT", metavar="ARTIFACT")
parser.add_argument("--compressor", choices=['xz', 'gzip', 'zstd'],
help="Override compressor to use")
parser.add_argument("--mode",
choices=['compress', 'uncompress'],
default=DEFAULT_MODE,
help=f", default is {DEFAULT_MODE}")
parser.add_argument("--fast", action='store_true',
help="Override compression to `gzip -1`")
args = parser.parse_args()
builds = Builds()
# default to latest build if not specified
if args.build:
build = args.build
else:
build = builds.get_latest()
print(f"Targeting build: {build}")
# common extensions for known compressors
ext_dict = {'xz': '.xz', 'gzip': '.gz', 'zstd': '.zst', 'zip': '.zip'}
def get_cpu_param(param):
with open(f'/sys/fs/cgroup/cpu/cpu.{param}') as f:
return int(f.read().strip())
def strip_ext(path):
return path.rsplit(".", 1)[0]
def compress_one_builddir(builddir, platform_compressors):
print(f"Compressing: {builddir}")
buildmeta_path = os.path.join(builddir, 'meta.json')
# In the case where we are doing builds for different architectures
# it's likely not all builds for this arch are local. If the meta.json
# doesn't exist then return early.
if not os.path.exists(buildmeta_path):
print(f"No meta.json exists for {builddir}.. Skipping")
return False
with open(buildmeta_path) as f:
buildmeta = json.load(f)
tmpdir = 'tmp/compress'
if os.path.isdir(tmpdir):
shutil.rmtree(tmpdir)
os.mkdir(tmpdir)
# Note we mutate the build dir in place, similarly to the buildextend
# commands. One cool approach here might be to `cp -al` the whole build
# dir, mutate it, then RENAME_EXCHANGE the two... though it doesn't seem
# Python exposes it yet so that'd require some hacking around. For now, we
# just guarantee that `compress` is idempotent and can resume from
# failures.
at_least_one = False
only_artifacts = None
if len(args.artifact) > 0:
only_artifacts = set(args.artifact)
skipped = []
for img_format, img in buildmeta['images'].items():
if img.get('skip-compression', False):
skipped += [img_format]
continue
if only_artifacts is not None and img_format not in only_artifacts:
continue
compressor = platform_compressors.get(img_format) or args.compressor
# Find what extension to use based on the selected compressor
ext = ext_dict[compressor]
file = img['path']
filepath = os.path.join(builddir, file)
if img.get('uncompressed-sha256') is None:
tmpfile = os.path.join(tmpdir, (file + ext))
# SHA256 + size for uncompressed image was already calculated
# during 'build'
img['uncompressed-sha256'] = img['sha256']
img['uncompressed-size'] = img['size']
with open(tmpfile, 'wb') as f:
t = ncpu()
if compressor == 'xz':
runcmd(['xz', '-c9', f'-T{t}', filepath], stdout=f)
elif compressor == 'zstd':
runcmd(['zstd', '-10', '-c', f'-T{t}', filepath], stdout=f)
elif compressor == 'gzip':
runcmd(['gzip', f'-{gzip_level}', '-c', filepath], stdout=f) # pylint: disable=E0606
elif compressor == 'zip':
runcmd(['zip', '-9j', '-', filepath], stdout=f) # pylint: disable=E0606
else:
raise Exception(f"Unknown compressor: {compressor}")
file_with_ext = file + ext
filepath_with_ext = filepath + ext
compressed_size = os.path.getsize(tmpfile)
img['path'] = file_with_ext
img['sha256'] = sha256sum_file(tmpfile)
img['size'] = compressed_size
# Just flush out after every image type, but unlink after writing.
# Then, we should be able to interrupt and restart from the last
# type.
shutil.move(tmpfile, filepath_with_ext)
write_json(buildmeta_path, buildmeta)
os.unlink(filepath)
at_least_one = True
print(f"Compressed: {file_with_ext}")
else:
# we've already compressed this in a previous pass
# try to delete the original file if it's somehow still around
rm_allow_noent(filepath[:-3])
if len(skipped) > 0:
print(f"Skipped compressing artifacts: {' '.join(skipped)}")
if at_least_one:
print(f"Updated: {buildmeta_path}")
return at_least_one
def uncompress_one_builddir(builddir):
# heavily based on the compress_one_builddir
print(f"Uncompressing: {builddir}")
buildmeta_path = os.path.join(builddir, 'meta.json')
with open(buildmeta_path) as f:
buildmeta = json.load(f)
tmpdir = 'tmp/uncompress'
if os.path.isdir(tmpdir):
shutil.rmtree(tmpdir)
os.mkdir(tmpdir)
# Note we mutate the build dir in place, similarly to the buildextend
# commands. One cool approach here might be to `cp -al` the whole build
# dir, mutate it, then RENAME_EXCHANGE the two... though it doesn't seem
# Python exposes it yet so that'd require some hacking around. For now, we
# just guarantee that `compress` is idempotent and can resume from
# failures.
at_least_one = False
only_artifacts = None
if len(args.artifact) > 0:
only_artifacts = set(args.artifact)
skipped = []
skipped_missing = []
for img_format, img in buildmeta['images'].items():
if img.get('skip-compression', False):
skipped += [img_format]
continue
if only_artifacts is not None and img_format not in only_artifacts:
continue
file = img['path']
filepath = os.path.join(builddir, file)
if not os.path.exists(filepath) and only_artifacts is None:
skipped_missing += [img_format]
continue
# uncompress only compressed files
if file.endswith(tuple(ext_dict.values())):
tmpfile = os.path.join(tmpdir, strip_ext(file))
# SHA256 + size for uncompressed image was already calculated
# during 'build'
with open(tmpfile, 'wb') as f:
if file.endswith('xz'):
t = ncpu()
runcmd(['xz', '-dc', f'-T{t}', filepath], stdout=f)
elif file.endswith('zst'):
runcmd(['zstd', '-dc', filepath], stdout=f)
elif file.endswith('gz'):
runcmd(['gzip', '-dc', filepath], stdout=f)
elif file.endswith('zip'):
runcmd(['unzip', '-p', filepath], stdout=f)
else:
print(f"Unknown sufix of file {file}")
file_without_ext = strip_ext(file)
filepath_without_ext = strip_ext(filepath)
uncompressed_size = os.path.getsize(tmpfile)
uncompressed_sha256 = sha256sum_file(tmpfile)
# Check integrity of the uncompressed images
if uncompressed_sha256 != img['uncompressed-sha256']:
print(f"sha256 missmatch for {filepath_without_ext}")
print(f"{uncompressed_sha256}!={img['uncompressed-sha256']}")
if uncompressed_size != img['uncompressed-size']:
print("size missmatch for {filepath_without_ext}")
print(f"{uncompressed_size}!={img['size']}")
del img['uncompressed-sha256']
del img['uncompressed-size']
img['path'] = file_without_ext
img['sha256'] = uncompressed_sha256
img['size'] = uncompressed_size
# Just flush out after every image type, but unlink after writing.
# Then, we should be able to interrupt and restart from the last
# type.
shutil.move(tmpfile, filepath_without_ext)
write_json(buildmeta_path, buildmeta)
os.unlink(filepath)
at_least_one = True
print(f"Uncompressed: {file_without_ext}")
else:
# try to delete the original file if it's somehow still around
rm_allow_noent(filepath[:-3])
if len(skipped) > 0:
print(f"Skipped uncompressing artifacts: {' '.join(skipped)}")
if len(skipped_missing) > 0:
print(f"Skipped missing artifacts: {' '.join(skipped_missing)}")
if at_least_one:
print(f"Updated: {buildmeta_path}")
return at_least_one
def get_image_json():
# All arches might not be local. Find one that has the info.
workdir = os.getcwd()
for arch in builds.get_build_arches(build):
builddir = builds.get_build_dir(build, arch)
if not os.path.exists(os.path.join(builddir, 'meta.json')):
continue
buildmeta = GenericBuildMeta(workdir=workdir, build=build, basearch=arch)
if not os.path.exists(os.path.join(builddir, buildmeta['images']['ostree']['path'])):
continue
import_ostree_commit(workdir, builddir, buildmeta) # runs extract_image_json()
with open(os.path.join(workdir, 'tmp/image.json')) as f:
return json.load(f)
# If we get here we were unsuccessful
print("Could not find/extract image.json. Please pass --compressor\n" +
"or make sure local ociarchive exists in the build directory.")
raise Exception("Could not find/extract image.json")
changed = []
if args.mode == "compress":
# Find what compressor we should use, either picking it up from
# CLI args or from image.json
image_json = get_image_json()
gzip_level = 6
if args.fast:
args.compressor = 'gzip'
gzip_level = 1
elif not args.compressor:
args.compressor = image_json.get('compressor', DEFAULT_COMPRESSOR)
for arch in builds.get_build_arches(build):
builddir = builds.get_build_dir(build, arch)
changed.append(compress_one_builddir(builddir,
image_json.get('platform-compressor', {})))
if not any(changed):
print("All builds already compressed")
elif args.mode == "uncompress":
for arch in builds.get_build_arches(build):
builddir = builds.get_build_dir(build, arch)
if not os.path.exists(builddir):
print(f"Skipping missing arch: {arch}")
continue
changed.append(uncompress_one_builddir(builddir))
if not any(changed):
print("All builds already uncompressed")