Skip to content

Commit

Permalink
first prefetch-buffer version
Browse files Browse the repository at this point in the history
  • Loading branch information
suchandler96 committed Nov 7, 2023
1 parent f1e88bb commit 903d7fe
Show file tree
Hide file tree
Showing 16 changed files with 287 additions and 69 deletions.
4 changes: 3 additions & 1 deletion bsc-util/nvdla_utilities/match_reg_trace_addr/remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ def compute_remap_decision(self):
pass

def write_to_files(self):
pass
script_path = os.path.join(os.path.abspath(self.nvdla_hw_path), "verif/verilator/input_txn_to_verilator.pl")
os.system("perl " + script_path + " " + os.path.join(self.out_dir, "input.txn") + " " +
os.path.join(self.out_dir, "trace.bin"))


class CVSRAMRemapper(BaseRemapper):
Expand Down
4 changes: 2 additions & 2 deletions bsc-util/nvdla_utilities/sweep/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ def main():

# dependencies
parser.add_argument(
"--nvdla-hw", default="/home/lactose/nvdla/hw/",
"--nvdla-hw", default="~/nvdla/hw/",
help="Path to NVDLA hw repo")
parser.add_argument(
"--disk-image", default="/home/lactose/gem5_linux_images/ubuntu-18.04-arm64-docker.img",
"--disk-image", default="~/gem5_linux_images/ubuntu-18.04-arm64-docker.img",
help="path to the disk image for full system simulation")
parser.add_argument(
"--gem5-binary", help="Path to the gem5 binary.")
Expand Down
97 changes: 96 additions & 1 deletion bsc-util/nvdla_utilities/sweep/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,58 @@ def next(self):
return True


class LittleCPUClockParam(BaseParam):
def __init__(self, name, sweep_vals):
BaseParam.__init__(self, name, sweep_vals)

def apply(self, point_dir):
change_config_file(
point_dir, "run.sh", {"little-cpu-clock": self.curr_sweep_value()})

@classmethod
def get(self, point_dir):
run_sh_path = os.path.join(point_dir, "run.sh")
assert os.path.exists(run_sh_path)
with open(run_sh_path, "r") as fp:
run_sh_lines = fp.readlines()

for line in run_sh_lines:
pos = line.find("--little-cpu-clock")
if pos == -1:
continue
return re.search(r"--little-cpu-clock\s+([0-9a-zA-Z\_]+)", line).group(1)

@classmethod
def default_value(cls):
return ["1GHz"]


class FreqRatioParam(BaseParam):
def __init__(self, name, sweep_vals):
BaseParam.__init__(self, name, sweep_vals)

def apply(self, point_dir):
change_config_file(
point_dir, "run.sh", {"freq-ratio": self.curr_sweep_value()})

@classmethod
def get(self, point_dir):
run_sh_path = os.path.join(point_dir, "run.sh")
assert os.path.exists(run_sh_path)
with open(run_sh_path, "r") as fp:
run_sh_lines = fp.readlines()

for line in run_sh_lines:
pos = line.find("--freq-ratio")
if pos == -1:
continue
return re.search(r"--freq-ratio\s+([0-9]+)", line).group(1)

@classmethod
def default_value(cls):
return [1]


class DDRTypeParam(BaseParam):
def __init__(self, name, sweep_vals):
BaseParam.__init__(self, name, sweep_vals)
Expand All @@ -71,7 +123,7 @@ def get(self, point_dir):
pos = line.find("--ddr-type")
if pos == -1:
continue
return re.search(r"--ddr-type\s+([0-9a-zA-Z\_]+)").group(1)
return re.search(r"--ddr-type\s+([0-9a-zA-Z\_]+)", line).group(1)

@classmethod
def default_value(cls):
Expand Down Expand Up @@ -136,6 +188,44 @@ def default_value(cls):
return [""]


class BufferModeParam(BaseParam):
def __init__(self, name, sweep_vals):
BaseParam.__init__(self, name, sweep_vals)

def apply(self, point_dir):
change_config_file(
point_dir, "run.sh", {"buffer-mode": self.curr_sweep_value()})

def is_meaningful(self, type_val_pairs):
if type_val_pairs[DMAEnableParam] != "--dma-enable" and \
type_val_pairs[AddAccelPrivateCacheParam] != "--add-accel-private-cache" and \
type_val_pairs[AddAccelSharedCacheParam] != "--add-accel-shared-cache" and \
self.curr_sweep_value() != self._sweep_vals[0]:
# different modes make no difference on membus and fakemem
return False
if type_val_pairs[PftEnableParam] != "--pft-enable" and self.curr_sweep_value() == "pft":
# buffer mode "pft" must be used when prefetch is enabled
return False
return True

@classmethod
def get(self, point_dir):
run_sh_path = os.path.join(point_dir, "run.sh")
assert os.path.exists(run_sh_path)
with open(run_sh_path, "r") as fp:
run_sh_lines = fp.readlines()

for line in run_sh_lines:
pos = line.find("--buffer-mode")
if pos == -1:
continue
return re.search(r"--buffer-mode\s+([a-zA-Z]+)", line).group(1)

@classmethod
def default_value(cls):
return ["all"]


class EmbedSPMSizeParam(BaseParam):
def __init__(self, name, sweep_vals):
BaseParam.__init__(self, name, sweep_vals)
Expand Down Expand Up @@ -846,6 +936,11 @@ def apply(self, point_dir):
change_config_file(
point_dir, "run.sh", {"pft-enable": self.curr_sweep_value()})

def is_meaningful(self, type_val_pairs):
if type_val_pairs[UseFakeMemParam] == "--use-fake-mem" and self.curr_sweep_value() != self._sweep_vals[0]:
return False
return True

@classmethod
def get(self, point_dir):
run_sh_path = os.path.join(point_dir, "run.sh")
Expand Down
5 changes: 4 additions & 1 deletion bsc-util/nvdla_utilities/sweep/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
%(gem5-binary)s \
-d %(output-dir)s \
%(config-dir)s --big-cpus 0 --little-cpus 1 --last-cache-level 2 --caches --accelerators \
--little-cpu-clock %(little-cpu-clock)s \
--freq-ratio %(freq-ratio)s \
--numNVDLA %(numNVDLA)s \
--maxReqNVDLA 1000 --enableTimingAXI \
--restore-from %(cpt-dir)s \
--bootscript=bootscript.rcS \
--ddr-type %(ddr-type)s \
--buffer-mode %(buffer-mode)s \
%(dma-enable)s \
%(shared-spm)s \
--embed-spm-size %(embed-spm-size)s \
--accel-embed-spm-lat %(accel-embed-spm-lat)s \
%(add-accel-private-cache)s \
Expand All @@ -33,7 +37,6 @@
%(pft-enable)s \
--pft-threshold %(pft-threshold)s \
%(use-fake-mem)s \
%(shared-spm)s \
%(cvsram-enable)s \
--cvsram-size %(cvsram-size)s \
--cvsram-bandwidth %(cvsram-bandwidth)s \
Expand Down
27 changes: 19 additions & 8 deletions bsc-util/nvdla_utilities/sweep/sweeper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
from params import *

param_types = {
"little-cpu-clock": LittleCPUClockParam,
"freq-ratio": FreqRatioParam,
"ddr-type": DDRTypeParam,
"numNVDLA": NumNVDLAParam,
"buffer-mode": BufferModeParam,
"dma-enable": DMAEnableParam,
"shared-spm": SharedSPMParam,
"embed-spm-size": EmbedSPMSizeParam,
Expand Down Expand Up @@ -51,6 +54,7 @@

class Sweeper:
def __init__(self, args):
self.home_path = os.popen("cd ~/ && pwd").readlines()[0].strip('\n')
self.gem5_nvdla_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
self.gen_points = args.gen_points
self.cpt_dir = None
Expand All @@ -62,6 +66,7 @@ def __init__(self, args):
os.makedirs(args.out_dir, exist_ok=True)
# create subdirectory 'traces' in case CVSRAM Remapper changes trace.bin

self.disk_image = args.disk_image
self.template_dir = os.path.dirname(os.path.abspath(__file__))
self.gem5_binary = args.gem5_binary
self.sim_dir = args.sim_dir
Expand All @@ -75,10 +80,6 @@ def __init__(self, args):
self.mappers = {}
self.mapper_comps = [] # [(mapper_path, [shell_cmd])]: each is a testcase that requires remapping computation

if not os.path.exists(os.path.join(self.gem5_nvdla_dir, "mnt/home")):
os.system("cd " + os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) + " && "
"sudo python3 util/gem5img.py mount " + args.disk_image + " ./mnt")

for root, dirs, files in os.walk(args.jsons_dir):
is_valid_dir = False
for file in files:
Expand Down Expand Up @@ -208,12 +209,13 @@ def _create_point(self, json_id):
assert False

# cpt-dir should be changed after regenerating a checkpoint
change_config_file(point_dir, "run.sh", {"gem5-binary": self.gem5_binary})
change_config_file(point_dir, "run.sh", {"gem5-binary": self.gem5_binary.replace(self.home_path, "~")})

change_config_file(point_dir, "run.sh", {"output-dir": os.path.abspath(point_dir)})
change_config_file(point_dir, "run.sh", {"output-dir": os.path.abspath(point_dir).replace(self.home_path, "~")})
change_config_file(point_dir, "bootscript.rcS", {"run-cmd": run_cmd})
change_config_file(point_dir, "run.sh", {"config-dir":
os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../configs/example/arm/fs_bigLITTLE_RTL.py"))})
os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../configs/example/arm/fs_bigLITTLE_RTL.py"))
.replace(self.home_path, "~")})

# Apply every sweep parameter for this data point.
for p in self.params_list[json_id][0]:
Expand All @@ -230,6 +232,10 @@ def parallel_remap_compute(self):
pool.join()

def resume_create_point(self):
if not os.path.exists(os.path.join(self.gem5_nvdla_dir, "mnt/home")):
os.system("cd " + os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) +
" && sudo python3 util/gem5img.py mount " + self.disk_image + " ./mnt")

for dump_mapper_path, _ in self.mapper_comps:
with open(dump_mapper_path, 'rb') as mapper_file:
mapper = pickle.load(mapper_file)
Expand Down Expand Up @@ -262,7 +268,8 @@ def enumerate(self, param_idx, json_id):

def enumerate_all(self):
"""Create configurations for all data points. """
print("Creating all data points...")
if self.gen_points:
print("Creating all data points...")
for json_id in range(len(self.params_list)):
self.enumerate(0, json_id)

Expand All @@ -283,6 +290,10 @@ def enumerate_all(self):
for pt_dir in self.pt_dirs:
change_config_file(pt_dir, "run.sh", {"cpt-dir": self.cpt_dir})

esc_home_path = self.home_path.replace('/', '\\/')
esc_out_dir = self.out_dir.replace('/', '\\/')
os.system('sed -i "s/' + esc_home_path + '/~/g" `grep "' + esc_home_path + '" -rl ' + esc_out_dir + '`')

def run_all(self, args):
"""Run simulations for all data points.
Expand Down
28 changes: 24 additions & 4 deletions configs/example/arm/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,28 @@ def addPrivateAccelerator(self, system, clk_domain, membus, options):

cpu.num_accels = options.numNVDLA

sft_pft_ctrl_str = "prefetch_enable=1, pft_threshold=options.pft_threshold"\
if options.pft_enable else "prefetch_enable=0"
if options.buffer_mode == "all":
pft_ctrl_str = "buffer_mode=0"
elif options.buffer_mode == "pft":
pft_ctrl_str = "buffer_mode=1"
else:
assert False

if options.pft_enable:
pft_ctrl_str += ", prefetch_enable=1, pft_threshold=options.pft_threshold"
if options.dma_enable:
if not options.shared_spm: # regular private SPM
pft_ctrl_str += ", pft_buf_size=options.embed_spm_size"
else:
pft_ctrl_str += ", pft_buf_size=options.embed_spm_size / options.numNVDLA"
elif options.add_accel_private_cache: # both private cache-only and mixed private & shared cache
pft_ctrl_str += ", pft_buf_size=options.accel_pr_cache_size"
elif options.add_accel_shared_cache:
pft_ctrl_str += ", pft_buf_size=options.accel_sh_cache_size"
else: # membus
pass
else:
pft_ctrl_str += ", prefetch_enable=0"

# in the current phase, we only use one NVDLA accelerator, and spm cannot be used with caches
if options.dma_enable:
Expand All @@ -175,10 +195,10 @@ def addPrivateAccelerator(self, system, clk_domain, membus, options):
else:
dma_ctrl_str = "dma_enable=0"

fakemem_ctrl_str = "use_fake_mem=options.use_fake_mem"
fakemem_ctrl_str = "use_fake_mem=options.use_fake_mem, freq_ratio=options.freq_ratio"

for i in range(4):
exec("cpu.accel_%d = rtlNVDLA(%s, %s, %s)" % (i, dma_ctrl_str, sft_pft_ctrl_str, fakemem_ctrl_str))
exec("cpu.accel_%d = rtlNVDLA(%s, %s, %s)" % (i, dma_ctrl_str, pft_ctrl_str, fakemem_ctrl_str))

for i in range(4):
exec("cpu.accel_port_%d = cpu.accel_%d.cpu_side" % (i, i))
Expand Down
8 changes: 6 additions & 2 deletions configs/example/arm/fs_bigLITTLE_RTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,19 @@ def addOptions(parser):

# options.numNVDLA
parser.add_argument("--numNVDLA", type=int, default=1, help="number of NVDLAs")
# options.freq_ratio
parser.add_argument("--freq-ratio", type=int, default=1, help="=(frequency of LITTLE CPU) / (frequency of NVDLA)")

# options.buffer_mode
parser.add_argument("--buffer-mode", type=str, default="all", help="How to use pr/sh cache/embedded-SPM. all: cache all; pft: prefetch-buffer-only")
# options.dma_enable
parser.add_argument("--dma-enable", action="store_true", default=False, help="Use scratchpad embedded in NVDLA wrapper, aided with DMA")
# options.shared_spm
parser.add_argument("--shared-spm", action="store_true", default=False, help="change embedded SPM to shared")
# options.embed_spm_size
parser.add_argument("--embed-spm-size", type=str, default="64kB", help="specify private SPM size for accelerators (embedded SPM)")
# options.accel_embed_spm_lat
parser.add_argument("--accel-embed-spm-lat", type=int, default=12, help="specify private SPM latency for accelerators (embedded SPM)")
# options
parser.add_argument("--shared-spm", action="store_true", default=False, help="change embedded SPM to shared")


# options.cvsram_enable
Expand Down
Loading

0 comments on commit 903d7fe

Please sign in to comment.