Skip to content

Commit

Permalink
Extract and improve script to download CUDA toolkit components.
Browse files Browse the repository at this point in the history
Previous to this, we were fetching a sample from an NVIDIA github repo and using CMake scripting to use it to download an appropriate SDK. This patch:

* Forks the parse_redist.py sample locally into third_party/nvidia_sdk_download.
* Fixes a number of things in parse_redist.py to make it more robust, remove warnings, and eliminate the dependency on the Python requests package.
* Removes the 'requests' package from all requirements files as it is no longer needed.
* Adds a fetch_cuda_toolkit.py which duplicates the behavior that was open coded in CMake scripting.
* Updates the build_tools/third_party/cuda/CMakeLists.txt to use the new script instead of its internal approach.

In a follow-on, I will use this script on the Bazel side to make it auto-fetch the CUDA SDK as needed as well.
  • Loading branch information
Stella Laurenzo committed Mar 9, 2023
1 parent 6bc4084 commit d1a65a7
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 93 deletions.
83 changes: 11 additions & 72 deletions build_tools/third_party/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,79 +5,18 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

function(fetch_cuda_toolkit)
# Parameters to the download script.
# Look for an appropriate redistrib_*.json here to verify:
# https://developer.download.nvidia.com/compute/cuda/redist/
set(_VERSION "11.6.2")
set(_PRODUCT "cuda")
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(_OS "linux")
elseif(WIN32)
set(_OS "windows")
else()
message(SEND_ERROR "Unsupported OS environment. Must be Windows or Linux.")
return()
set(_DOWNLOAD_SCRIPT_PATH "${IREE_SOURCE_DIR}/third_party/nvidia_sdk_download/fetch_cuda_toolkit.py")
message(STATUS "Checking and downloading CUDA SDK toolkit components")
execute_process(COMMAND ${Python3_EXECUTABLE}
"${_DOWNLOAD_SCRIPT_PATH}" "${CMAKE_CURRENT_BINARY_DIR}"
RESULT_VARIABLE _EXEC_RESULT
OUTPUT_VARIABLE _ACTUAL_DOWNLOAD_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(_EXEC_RESULT AND NOT _EXEC_RESULT EQUAL 0)
message(FATAL_ERROR "Error fetching CUDA toolkit")
endif()
# CUDA is only supported on Linux/Windows where x64 is the only arch for now.
# Note: CMAKE_HOST_SYSTEM_PROCESSOR may be AMD64 on Windows, but we still
# want to use `x86_64` here.
set(_ARCH "x86_64")

set(_TARGET_DIR "${CMAKE_CURRENT_BINARY_DIR}/${_VERSION}")
set(_DOWNLOAD_SCRIPT_URL "https://raw.githubusercontent.com/NVIDIA/build-system-archive-import-examples/44dfb51fad75a8a2f1044a4fe221aba70571b86f/parse_redist.py")
set(_DOWNLOAD_SCRIPT_PATH "${_TARGET_DIR}/parse_redist.py")

# Only download if haven't already.
# This will produce a unified directory tree under:
# flat/$OS-$ARCH
set(_ARCH_DIR "${_TARGET_DIR}/${_OS}-${_ARCH}")
set(_TOUCH_FILE "${_TARGET_DIR}/cuda_toolkit.downloaded")

if(NOT EXISTS "${_TOUCH_FILE}")
# The parse_redist.py script requires the Python requests module, which
# is not yet installed by default. Check for it.
execute_process(
COMMAND ${Python3_EXECUTABLE} -c "import requests"
RESULT_VARIABLE _PY_MODULES_EXIST_CODE
OUTPUT_QUIET
)
if(NOT ${_PY_MODULES_EXIST_CODE} EQUAL 0)
message(SEND_ERROR "CUDA auto-download requires Python packages that do not exist on your system. Recommend running: \n ${Python3_EXECUTABLE} -m pip install requests")
return()
endif()

# Components that we need to fetch.
set(_COMPONENTS_TO_FETCH "")
list(APPEND _COMPONENTS_TO_FETCH "cuda_nvcc")
list(APPEND _COMPONENTS_TO_FETCH "cuda_cudart")

message(STATUS "Extracting CUDA Toolkit to ${_TARGET_DIR}")
file(MAKE_DIRECTORY ${_TARGET_DIR})

# First fetch the download script to its own directory.
file(DOWNLOAD ${_DOWNLOAD_SCRIPT_URL} ${_DOWNLOAD_SCRIPT_PATH})

# Then use the download script to fetch and flatten each component we want
# into the target dir.
foreach(COMPONENT ${_COMPONENTS_TO_FETCH})
message(STATUS "Downloading component ${COMPONENT}")
execute_process(COMMAND ${Python3_EXECUTABLE} "${_DOWNLOAD_SCRIPT_PATH}"
--label "${_VERSION}"
--product "${_PRODUCT}"
--os "${_OS}"
--arch "${_ARCH}"
--component "${COMPONENT}"
--output "${_TARGET_DIR}")
endforeach()
endif()

if(NOT EXISTS "${_ARCH_DIR}")
message(FATAL_ERROR "Download did not produce expected source dir: ${_ARCH_DIR}")
return()
endif()

file(TOUCH "${_TOUCH_FILE}")
set(CUDAToolkit_ROOT "${_ARCH_DIR}" PARENT_SCOPE)
set(CUDAToolkit_ROOT ${_ACTUAL_DOWNLOAD_PATH} PARENT_SCOPE)
endfunction()

if(DEFINED ENV{IREE_CUDA_DEPS_DIR})
Expand Down
1 change: 0 additions & 1 deletion compiler/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@ requires = [
"packaging",
"pybind11>=2.10.1",
"PyYAML",
"requests",
]
build-backend = "setuptools.build_meta"
1 change: 0 additions & 1 deletion runtime/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,5 @@ requires = [
"packaging",
"pybind11>=2.10.1",
"PyYAML",
"requests",
]
build-backend = "setuptools.build_meta"
86 changes: 86 additions & 0 deletions third_party/nvidia_sdk_download/fetch_cuda_toolkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Fetches components of the CUDA toolkit that we need to build.
Syntax:
fetch_cuda_toolkit.py {output_dir}
This will download an appropriate toolkit (subset) and print the full path
to the resulting directory (which will be a sub-directory of the output_dir).
"""

from pathlib import Path
import platform
import shutil
import subprocess
import sys

VERSION = "11.6.2"
PRODUCT = "cuda"
COMPONENTS = ["cuda_nvcc", "cuda_cudart"]


def main(output_dir: Path):
system = platform.system()
if system == "Linux":
os = "linux"
elif system == "Windows":
os = "windows"
else:
print("ERROR: Fetching CUDA toolkit only supported on windows and linux")
sys.exit(1)

arch = platform.machine()
if arch == "AMD64":
arch = "x86_64"

target_dir = output_dir / VERSION
arch_dir = target_dir / f"{os}-{arch}"
touch_file = arch_dir / "cuda_toolkit.downloaded"
if touch_file.exists():
print(f"Not downloading because touch file exists: {touch_file}",
file=sys.stderr)
else:
# Remove and create arch dir.
if arch_dir.exists():
shutil.rmtree(arch_dir)
arch_dir.mkdir(parents=True, exist_ok=True)

for component in COMPONENTS:
print(f"Downloading component {component}", file=sys.stderr)
subprocess.check_call([
sys.executable,
str(Path(__file__).resolve().parent / "parse_redist.py"),
"--label",
VERSION,
"--product",
PRODUCT,
"--os",
os,
"--arch",
arch,
"--component",
component,
"--output",
target_dir,
],
cwd=target_dir,
stdout=sys.stderr)

# Touch the file to note done.
with open(touch_file, "w") as f:
pass

# Report back.
print(arch_dir)


if __name__ == "__main__":
if len(sys.argv) != 2:
print("ERROR: Expected output_dir", file=sys.stderr)
sys.exit(1)
main(Path(sys.argv[1]))
70 changes: 51 additions & 19 deletions third_party/nvidia_sdk_download/parse_redist.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@
2. Validates SHA256 checksums
3. Extracts archives
4. Flattens into a collapsed directory structure
Forked from https://github.com/NVIDIA/build-system-archive-import-examples/blob/355e25cca11725e88984443a6a343dffeb43308a/parse_redist.py
and patched:
- avoid a dependency on the non-standard requests package (see
the http_get helper) by using urllib directly
- explicit error handling on hash mismatch
- always download, even if files exist
- remove dependence on deprecated distutils copy_tree in favor of shutil
"""
from distutils.dir_util import copy_tree
import argparse
import os.path
import hashlib
Expand All @@ -18,7 +25,7 @@
import tarfile
import zipfile
import sys
import requests
import urllib.request

__version__ = "0.1.0"

Expand All @@ -40,6 +47,21 @@
COLLAPSE = True


def http_get(url):
"""Fetch the contents of a URL."""
with urllib.request.urlopen(url) as f:
data = f.read()
if hasattr(f, "status"):
# For >= 3.9
status_code = f.status
else:
# Deprecated in 3.9
statuc_code = f.code
if status_code != 200:
raise IOError(" -> Failed to download: " + url)
return data


def err(msg):
"""Print error message and exit"""
print("ERROR: " + msg)
Expand All @@ -48,14 +70,11 @@ def err(msg):

def fetch_file(full_path, filename):
"""Download file to disk"""
download = requests.get(full_path)
if download.status_code != 200:
print(" -> Failed: " + filename)
else:
print(":: Fetching: " + full_path)
with open(filename, "wb") as file:
file.write(download.content)
print(" -> Wrote: " + filename)
print(":: Fetching: " + full_path)
download_data = http_get(full_path)
with open(filename, "wb") as file:
file.write(download_data)
print(" -> Wrote: " + filename)


def get_hash(filename):
Expand All @@ -77,18 +96,31 @@ def check_hash(filename, checksum):
if checksum == sha256:
print(" Verified sha256sum: " + sha256)
else:
print(" => Mismatch sha256sum:")
print(" -> Calculation: " + sha256)
print(" -> Expectation: " + checksum)
raise IOError(f"Mismatch sha256sum: Calculation={sha256}, "
f"Expectation={checksum} for {filename}")


def flatten_tree(src, dest):
"""Merge hierarchy from multiple directories"""

# Should use shutil.copytree(dirs_exist_ok=True), but that isn't available
# until Python 3.8.
def copytree(src, dst):
if not os.path.exists(dst):
os.makedirs(dst)
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
copytree(s, d)
else:
if not os.path.exists(d):
shutil.copy2(s, d)

try:
copy_tree(src, dest, preserve_symlinks=1, update=1, verbose=1)
except FileExistsError:
pass
shutil.rmtree(src)
copytree(src, dest)
finally:
shutil.rmtree(src)


def fetch_action(parent):
Expand Down Expand Up @@ -116,7 +148,7 @@ def fetch_action(parent):
filename = os.path.basename(full_path)
ARCHIVES[platform].append(filename)

if RETRIEVE and not os.path.exists(filename):
if RETRIEVE:
# Download archive
fetch_file(full_path, filename)
elif os.path.exists(filename):
Expand Down Expand Up @@ -257,7 +289,7 @@ def post_action():

# Parse JSON
try:
MANIFEST = requests.get(URL).json()
MANIFEST = json.loads(http_get(URL))
except json.decoder.JSONDecodeError:
err("redistrib JSON manifest file not found")

Expand Down

0 comments on commit d1a65a7

Please sign in to comment.