Skip to content

Commit

Permalink
Merge pull request GoogleContainerTools#121 from xingao267/master
Browse files Browse the repository at this point in the history
Support passing complete url of Packages.gz file in dpkg_parser.
  • Loading branch information
r2d4 authored Oct 26, 2017
2 parents f3bb1f3 + 77cfa2f commit 872f43c
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 20 deletions.
43 changes: 30 additions & 13 deletions package_manager/dpkg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,18 @@
help='The snapshot date to download')
parser.add_argument("--sha256", action='store',
help='The sha256 checksum to validate for the Packages.gz file')
parser.add_argument("--packages-gz-url", action='store',
help='The full url for the Packages.gz file')
parser.add_argument("--package-prefix", action='store',
help='The prefix to prepend to the value of Filename key in the Packages.gz file.')


def main():
""" A tool for downloading debian packages and package metadata """
args = parser.parse_args()
if args.download_and_extract_only:
download_package_list(args.mirror_url, args.distro, args.arch, args.snapshot, args.sha256)
download_package_list(args.mirror_url,args.distro, args.arch, args.snapshot, args.sha256,
args.packages_gz_url, args.package_prefix)
util.build_os_release_tar(args.distro, OS_RELEASE_FILE_NAME, OS_RELEASE_PATH, OS_RELEASE_TAR_FILE_NAME)
else:
download_dpkg(args.package_files, args.packages, args.workspace_name)
Expand All @@ -85,7 +91,7 @@ def download_dpkg(package_files, packages, workspace_name):
for package_file in package_files.split(","):
with open(package_file, 'rb') as f:
metadata = json.load(f)
if (pkg_name in metadata and
if (pkg_name in metadata and
(pkg_version == "" or
pkg_version == metadata[pkg_name][VERSION_KEY])):
pkg = metadata[pkg_name]
Expand All @@ -100,10 +106,10 @@ def download_dpkg(package_files, packages, workspace_name):
raise Exception("Wrong checksum for package %s. Expected: %s, Actual: %s", pkg_name, expected_checksum, actual_checksum)
if pkg_version == "":
break
if (pkg_vals in pkg_vals_to_package_file_and_sha256 and
if (pkg_vals in pkg_vals_to_package_file_and_sha256 and
pkg_vals_to_package_file_and_sha256[pkg_vals][1] != actual_checksum):
raise Exception("Conflicting checksums for package %s, version %s. Conflicting checksums: %s:%s, %s:%s",
pkg_name, pkg_version,
raise Exception("Conflicting checksums for package %s, version %s. Conflicting checksums: %s:%s, %s:%s",
pkg_name, pkg_version,
pkg_vals_to_package_file_and_sha256[pkg_vals][0], pkg_vals_to_package_file_and_sha256[pkg_vals][1],
package_file, actual_checksum)
else:
Expand All @@ -114,7 +120,7 @@ def download_dpkg(package_files, packages, workspace_name):
with open(PACKAGE_MAP_FILE_NAME, 'w') as f:
f.write("packages = " + json.dumps(package_to_rule_map))

def download_package_list(mirror_url, distro, arch, snapshot, sha256):
def download_package_list(mirror_url, distro, arch, snapshot, sha256, packages_gz_url, package_prefix):
"""Downloads a debian package list, expands the relative urls,
and saves the metadata as a json file
Expand Down Expand Up @@ -144,12 +150,23 @@ def download_package_list(mirror_url, distro, arch, snapshot, sha256):
SHA256: 52ec3ac93cf8ba038fbcefe1e78f26ca1d59356cdc95e60f987c3f52b3f5e7ef
"""
url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.gz" % (
mirror_url,
snapshot,
distro,
arch
)

if bool(packages_gz_url) != bool(package_prefix):
raise Exception("packages_gz_url and package_prefix must be specified or skipped at the same time.")

if (not packages_gz_url) and (not mirror_url or not snapshot or not distro or not arch):
raise Exception("If packages_gz_url is not specified, all of mirror_url, snapshot, "
"distro and arch must be specified.")

url = packages_gz_url
if not url:
url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.gz" % (
mirror_url,
snapshot,
distro,
arch
)

buf = urllib2.urlopen(url)
with open("Packages.gz", 'w') as f:
f.write(buf.read())
Expand All @@ -158,7 +175,7 @@ def download_package_list(mirror_url, distro, arch, snapshot, sha256):
raise Exception("sha256 of Packages.gz don't match: Expected: %s, Actual:%s" %(sha256, actual_sha256))
with gzip.open("Packages.gz", 'rb') as f:
data = f.read()
metadata = parse_package_metadata(data, mirror_url, snapshot)
metadata = parse_package_metadata(data, mirror_url, snapshot, package_prefix)
with open(PACKAGES_FILE_NAME, 'w') as f:
json.dump(metadata, f)

Expand Down
7 changes: 5 additions & 2 deletions package_manager/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
FILENAME_KEY = "Filename"
SEPARATOR = ":"

def parse_package_metadata(data, mirror_url, snapshot):
def parse_package_metadata(data, mirror_url, snapshot, package_prefix):
""" Takes a debian package list, changes the relative urls to absolute urls,
and saves the resulting metadata as a json file """
raw_entries = [line.rstrip() for line in data.splitlines()]
Expand Down Expand Up @@ -54,5 +54,8 @@ def parse_package_metadata(data, mirror_url, snapshot):
# Here, we're rewriting the metadata with the absolute urls,
# which is a concatenation of the mirror + '/debian/' + relative_path
for pkg_data in parsed_entries.itervalues():
pkg_data[FILENAME_KEY] = mirror_url + "/debian/" + snapshot + "/" + pkg_data[FILENAME_KEY]
if package_prefix:
pkg_data[FILENAME_KEY] = package_prefix + pkg_data[FILENAME_KEY]
else:
pkg_data[FILENAME_KEY] = mirror_url + "/debian/" + snapshot + "/" + pkg_data[FILENAME_KEY]
return parsed_entries
18 changes: 13 additions & 5 deletions package_manager/parse_metadata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@ def setUp(self):
data = f.read()
self.data = data
self.mirror_url = "http://debian.org"
self.metadata = parse_package_metadata(self.data, self.mirror_url, "20170701")
self.package_prefix = "http://dummy/prefix/"
self.debian_repo_metadata = parse_package_metadata(self.data, self.mirror_url, "20170701", "")
self.arbitrary_repo_metadata = parse_package_metadata(self.data, "", "", self.package_prefix)

def test_url_rewrite(self):
def test_debian_repo_url_rewrite(self):
""" Relative url should have gotten rewritten with absolute url """
self.assertEqual(
self.metadata["libnewlib-dev"]["Filename"],
self.debian_repo_metadata["libnewlib-dev"]["Filename"],
self.mirror_url + "/debian/20170701/" + "pool/main/n/newlib/libnewlib-dev_2.1.0+git20140818.1a8323b-2_all.deb")

def test_arbitrary_repo_url_rewrite(self):
""" Relative url should have gotten rewritten with absolute url using the given package prefix """
self.assertEqual(
self.arbitrary_repo_metadata["libnewlib-dev"]["Filename"],
self.package_prefix + "pool/main/n/newlib/libnewlib-dev_2.1.0+git20140818.1a8323b-2_all.deb")

def test_get_all_packages(self):
""" Parser should identify all packages """
expected_packages = ["libnewlib-dev",
Expand All @@ -28,12 +36,12 @@ def test_get_all_packages(self):
"newmail",
"zzuf",]
for _ in expected_packages:
self.assertEqual(expected_packages.sort(), self.metadata.keys().sort())
self.assertEqual(expected_packages.sort(), self.debian_repo_metadata.keys().sort())

def test_multiline_key(self):
""" Multiline keys should be properly parsed """
expected_tags = "interface::commandline, mail::notification, role::program,scope::utility, works-with::mail"
self.assertEqual(expected_tags, self.metadata["newmail"]["Tag"])
self.assertEqual(expected_tags, self.debian_repo_metadata["newmail"]["Tag"])

if __name__ == '__main__':

Expand Down

0 comments on commit 872f43c

Please sign in to comment.