first prefetch-buffer version

suchandler96 · Nov 7, 2023 · 903d7fe · 903d7fe
1 parent f1e88bb
commit 903d7fe
Show file tree

Hide file tree

Showing 16 changed files with 287 additions and 69 deletions.
diff --git a/bsc-util/nvdla_utilities/match_reg_trace_addr/remap.py b/bsc-util/nvdla_utilities/match_reg_trace_addr/remap.py
@@ -69,7 +69,9 @@ def compute_remap_decision(self):
         pass
 
     def write_to_files(self):
-        pass
+        script_path = os.path.join(os.path.abspath(self.nvdla_hw_path), "verif/verilator/input_txn_to_verilator.pl")
+        os.system("perl " + script_path + " " + os.path.join(self.out_dir, "input.txn") + " " +
+                  os.path.join(self.out_dir, "trace.bin"))
 
 
 class CVSRAMRemapper(BaseRemapper):

diff --git a/bsc-util/nvdla_utilities/sweep/main.py b/bsc-util/nvdla_utilities/sweep/main.py
@@ -20,10 +20,10 @@ def main():
 
     # dependencies
     parser.add_argument(
-        "--nvdla-hw", default="/home/lactose/nvdla/hw/",
+        "--nvdla-hw", default="~/nvdla/hw/",
         help="Path to NVDLA hw repo")
     parser.add_argument(
-        "--disk-image", default="/home/lactose/gem5_linux_images/ubuntu-18.04-arm64-docker.img",
+        "--disk-image", default="~/gem5_linux_images/ubuntu-18.04-arm64-docker.img",
         help="path to the disk image for full system simulation")
     parser.add_argument(
         "--gem5-binary", help="Path to the gem5 binary.")

diff --git a/bsc-util/nvdla_utilities/sweep/params.py b/bsc-util/nvdla_utilities/sweep/params.py
@@ -52,6 +52,58 @@ def next(self):
         return True
 
 
+class LittleCPUClockParam(BaseParam):
+    def __init__(self, name, sweep_vals):
+        BaseParam.__init__(self, name, sweep_vals)
+
+    def apply(self, point_dir):
+        change_config_file(
+            point_dir, "run.sh", {"little-cpu-clock": self.curr_sweep_value()})
+
+    @classmethod
+    def get(self, point_dir):
+        run_sh_path = os.path.join(point_dir, "run.sh")
+        assert os.path.exists(run_sh_path)
+        with open(run_sh_path, "r") as fp:
+            run_sh_lines = fp.readlines()
+
+        for line in run_sh_lines:
+            pos = line.find("--little-cpu-clock")
+            if pos == -1:
+                continue
+            return re.search(r"--little-cpu-clock\s+([0-9a-zA-Z\_]+)", line).group(1)
+
+    @classmethod
+    def default_value(cls):
+        return ["1GHz"]
+
+
+class FreqRatioParam(BaseParam):
+    def __init__(self, name, sweep_vals):
+        BaseParam.__init__(self, name, sweep_vals)
+
+    def apply(self, point_dir):
+        change_config_file(
+            point_dir, "run.sh", {"freq-ratio": self.curr_sweep_value()})
+
+    @classmethod
+    def get(self, point_dir):
+        run_sh_path = os.path.join(point_dir, "run.sh")
+        assert os.path.exists(run_sh_path)
+        with open(run_sh_path, "r") as fp:
+            run_sh_lines = fp.readlines()
+
+        for line in run_sh_lines:
+            pos = line.find("--freq-ratio")
+            if pos == -1:
+                continue
+            return re.search(r"--freq-ratio\s+([0-9]+)", line).group(1)
+
+    @classmethod
+    def default_value(cls):
+        return [1]
+
+
 class DDRTypeParam(BaseParam):
     def __init__(self, name, sweep_vals):
         BaseParam.__init__(self, name, sweep_vals)
@@ -71,7 +123,7 @@ def get(self, point_dir):
             pos = line.find("--ddr-type")
             if pos == -1:
                 continue
-            return re.search(r"--ddr-type\s+([0-9a-zA-Z\_]+)").group(1)
+            return re.search(r"--ddr-type\s+([0-9a-zA-Z\_]+)", line).group(1)
 
     @classmethod
     def default_value(cls):
@@ -136,6 +188,44 @@ def default_value(cls):
         return [""]
 
 
+class BufferModeParam(BaseParam):
+    def __init__(self, name, sweep_vals):
+        BaseParam.__init__(self, name, sweep_vals)
+
+    def apply(self, point_dir):
+        change_config_file(
+            point_dir, "run.sh", {"buffer-mode": self.curr_sweep_value()})
+
+    def is_meaningful(self, type_val_pairs):
+        if type_val_pairs[DMAEnableParam] != "--dma-enable" and \
+            type_val_pairs[AddAccelPrivateCacheParam] != "--add-accel-private-cache" and \
+            type_val_pairs[AddAccelSharedCacheParam] != "--add-accel-shared-cache" and \
+                self.curr_sweep_value() != self._sweep_vals[0]:
+            # different modes make no difference on membus and fakemem
+            return False
+        if type_val_pairs[PftEnableParam] != "--pft-enable" and self.curr_sweep_value() == "pft":
+            # buffer mode "pft" must be used when prefetch is enabled
+            return False
+        return True
+
+    @classmethod
+    def get(self, point_dir):
+        run_sh_path = os.path.join(point_dir, "run.sh")
+        assert os.path.exists(run_sh_path)
+        with open(run_sh_path, "r") as fp:
+            run_sh_lines = fp.readlines()
+
+        for line in run_sh_lines:
+            pos = line.find("--buffer-mode")
+            if pos == -1:
+                continue
+            return re.search(r"--buffer-mode\s+([a-zA-Z]+)", line).group(1)
+
+    @classmethod
+    def default_value(cls):
+        return ["all"]
+
+
 class EmbedSPMSizeParam(BaseParam):
     def __init__(self, name, sweep_vals):
         BaseParam.__init__(self, name, sweep_vals)
@@ -846,6 +936,11 @@ def apply(self, point_dir):
         change_config_file(
             point_dir, "run.sh", {"pft-enable": self.curr_sweep_value()})
 
+    def is_meaningful(self, type_val_pairs):
+        if type_val_pairs[UseFakeMemParam] == "--use-fake-mem" and self.curr_sweep_value() != self._sweep_vals[0]:
+            return False
+        return True
+
     @classmethod
     def get(self, point_dir):
         run_sh_path = os.path.join(point_dir, "run.sh")

diff --git a/bsc-util/nvdla_utilities/sweep/run.sh b/bsc-util/nvdla_utilities/sweep/run.sh
@@ -2,12 +2,16 @@
 %(gem5-binary)s \
 -d %(output-dir)s \
 %(config-dir)s --big-cpus 0 --little-cpus 1 --last-cache-level 2 --caches --accelerators \
+--little-cpu-clock %(little-cpu-clock)s \
+--freq-ratio %(freq-ratio)s \
 --numNVDLA %(numNVDLA)s \
 --maxReqNVDLA 1000 --enableTimingAXI \
 --restore-from %(cpt-dir)s \
 --bootscript=bootscript.rcS \
 --ddr-type %(ddr-type)s \
+--buffer-mode %(buffer-mode)s \
 %(dma-enable)s \
+%(shared-spm)s \
 --embed-spm-size %(embed-spm-size)s \
 --accel-embed-spm-lat %(accel-embed-spm-lat)s \
 %(add-accel-private-cache)s \
@@ -33,7 +37,6 @@
 %(pft-enable)s \
 --pft-threshold %(pft-threshold)s \
 %(use-fake-mem)s \
-%(shared-spm)s \
 %(cvsram-enable)s \
 --cvsram-size %(cvsram-size)s \
 --cvsram-bandwidth %(cvsram-bandwidth)s \

diff --git a/bsc-util/nvdla_utilities/sweep/sweeper.py b/bsc-util/nvdla_utilities/sweep/sweeper.py
@@ -13,8 +13,11 @@
 from params import *
 
 param_types = {
+    "little-cpu-clock": LittleCPUClockParam,
+    "freq-ratio": FreqRatioParam,
     "ddr-type": DDRTypeParam,
     "numNVDLA": NumNVDLAParam,
+    "buffer-mode": BufferModeParam,
     "dma-enable": DMAEnableParam,
     "shared-spm": SharedSPMParam,
     "embed-spm-size": EmbedSPMSizeParam,
@@ -51,6 +54,7 @@
 
 class Sweeper:
     def __init__(self, args):
+        self.home_path = os.popen("cd ~/ && pwd").readlines()[0].strip('\n')
         self.gem5_nvdla_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
         self.gen_points = args.gen_points
         self.cpt_dir = None
@@ -62,6 +66,7 @@ def __init__(self, args):
         os.makedirs(args.out_dir, exist_ok=True)
         # create subdirectory 'traces' in case CVSRAM Remapper changes trace.bin
 
+        self.disk_image = args.disk_image
         self.template_dir = os.path.dirname(os.path.abspath(__file__))
         self.gem5_binary = args.gem5_binary
         self.sim_dir = args.sim_dir
@@ -75,10 +80,6 @@ def __init__(self, args):
         self.mappers = {}
         self.mapper_comps = []  # [(mapper_path, [shell_cmd])]: each is a testcase that requires remapping computation
 
-        if not os.path.exists(os.path.join(self.gem5_nvdla_dir, "mnt/home")):
-            os.system("cd " + os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) + " && "
-                      "sudo python3 util/gem5img.py mount " + args.disk_image + " ./mnt")
-
         for root, dirs, files in os.walk(args.jsons_dir):
             is_valid_dir = False
             for file in files:
@@ -208,12 +209,13 @@ def _create_point(self, json_id):
             assert False
 
         # cpt-dir should be changed after regenerating a checkpoint
-        change_config_file(point_dir, "run.sh", {"gem5-binary": self.gem5_binary})
+        change_config_file(point_dir, "run.sh", {"gem5-binary": self.gem5_binary.replace(self.home_path, "~")})
 
-        change_config_file(point_dir, "run.sh", {"output-dir": os.path.abspath(point_dir)})
+        change_config_file(point_dir, "run.sh", {"output-dir": os.path.abspath(point_dir).replace(self.home_path, "~")})
         change_config_file(point_dir, "bootscript.rcS", {"run-cmd": run_cmd})
         change_config_file(point_dir, "run.sh", {"config-dir":
-            os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../configs/example/arm/fs_bigLITTLE_RTL.py"))})
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../configs/example/arm/fs_bigLITTLE_RTL.py"))
+                                                 .replace(self.home_path, "~")})
 
         # Apply every sweep parameter for this data point.
         for p in self.params_list[json_id][0]:
@@ -230,6 +232,10 @@ def parallel_remap_compute(self):
         pool.join()
 
     def resume_create_point(self):
+        if not os.path.exists(os.path.join(self.gem5_nvdla_dir, "mnt/home")):
+            os.system("cd " + os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) +
+                      " && sudo python3 util/gem5img.py mount " + self.disk_image + " ./mnt")
+
         for dump_mapper_path, _ in self.mapper_comps:
             with open(dump_mapper_path, 'rb') as mapper_file:
                 mapper = pickle.load(mapper_file)
@@ -262,7 +268,8 @@ def enumerate(self, param_idx, json_id):
 
     def enumerate_all(self):
         """Create configurations for all data points.  """
-        print("Creating all data points...")
+        if self.gen_points:
+            print("Creating all data points...")
         for json_id in range(len(self.params_list)):
             self.enumerate(0, json_id)
 
@@ -283,6 +290,10 @@ def enumerate_all(self):
             for pt_dir in self.pt_dirs:
                 change_config_file(pt_dir, "run.sh", {"cpt-dir": self.cpt_dir})
 
+            esc_home_path = self.home_path.replace('/', '\\/')
+            esc_out_dir = self.out_dir.replace('/', '\\/')
+            os.system('sed -i "s/' + esc_home_path + '/~/g" `grep "' + esc_home_path + '" -rl ' + esc_out_dir + '`')
+
     def run_all(self, args):
         """Run simulations for all data points.
 

diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
@@ -164,8 +164,28 @@ def addPrivateAccelerator(self, system, clk_domain, membus, options):
 
             cpu.num_accels = options.numNVDLA
 
-            sft_pft_ctrl_str = "prefetch_enable=1, pft_threshold=options.pft_threshold"\
-                if options.pft_enable else "prefetch_enable=0"
+            if options.buffer_mode == "all":
+                pft_ctrl_str = "buffer_mode=0"
+            elif options.buffer_mode == "pft":
+                pft_ctrl_str = "buffer_mode=1"
+            else:
+                assert False
+
+            if options.pft_enable:
+                pft_ctrl_str += ", prefetch_enable=1, pft_threshold=options.pft_threshold"
+                if options.dma_enable:
+                    if not options.shared_spm:          # regular private SPM
+                        pft_ctrl_str += ", pft_buf_size=options.embed_spm_size"
+                    else:
+                        pft_ctrl_str += ", pft_buf_size=options.embed_spm_size / options.numNVDLA"
+                elif options.add_accel_private_cache:   # both private cache-only and mixed private & shared cache
+                    pft_ctrl_str += ", pft_buf_size=options.accel_pr_cache_size"
+                elif options.add_accel_shared_cache:
+                    pft_ctrl_str += ", pft_buf_size=options.accel_sh_cache_size"
+                else:                                   # membus
+                    pass
+            else:
+                pft_ctrl_str += ", prefetch_enable=0"
 
             # in the current phase, we only use one NVDLA accelerator, and spm cannot be used with caches
             if options.dma_enable:
@@ -175,10 +195,10 @@ def addPrivateAccelerator(self, system, clk_domain, membus, options):
             else:
                 dma_ctrl_str = "dma_enable=0"
 
-            fakemem_ctrl_str = "use_fake_mem=options.use_fake_mem"
+            fakemem_ctrl_str = "use_fake_mem=options.use_fake_mem, freq_ratio=options.freq_ratio"
 
             for i in range(4):
-                exec("cpu.accel_%d = rtlNVDLA(%s, %s, %s)" % (i, dma_ctrl_str, sft_pft_ctrl_str, fakemem_ctrl_str))
+                exec("cpu.accel_%d = rtlNVDLA(%s, %s, %s)" % (i, dma_ctrl_str, pft_ctrl_str, fakemem_ctrl_str))
 
             for i in range(4):
                 exec("cpu.accel_port_%d = cpu.accel_%d.cpu_side" % (i, i))

diff --git a/configs/example/arm/fs_bigLITTLE_RTL.py b/configs/example/arm/fs_bigLITTLE_RTL.py
@@ -224,15 +224,19 @@ def addOptions(parser):
 
     # options.numNVDLA
     parser.add_argument("--numNVDLA", type=int, default=1, help="number of NVDLAs")
+    # options.freq_ratio
+    parser.add_argument("--freq-ratio", type=int, default=1, help="=(frequency of LITTLE CPU) / (frequency of NVDLA)")
 
+    # options.buffer_mode
+    parser.add_argument("--buffer-mode", type=str, default="all", help="How to use pr/sh cache/embedded-SPM. all: cache all; pft: prefetch-buffer-only")
     # options.dma_enable
     parser.add_argument("--dma-enable", action="store_true", default=False, help="Use scratchpad embedded in NVDLA wrapper, aided with DMA")
+    # options.shared_spm
+    parser.add_argument("--shared-spm", action="store_true", default=False, help="change embedded SPM to shared")
     # options.embed_spm_size
     parser.add_argument("--embed-spm-size", type=str, default="64kB", help="specify private SPM size for accelerators (embedded SPM)")
     # options.accel_embed_spm_lat
     parser.add_argument("--accel-embed-spm-lat", type=int, default=12, help="specify private SPM latency for accelerators (embedded SPM)")
-    # options
-    parser.add_argument("--shared-spm", action="store_true", default=False, help="change embedded SPM to shared")
 
 
     # options.cvsram_enable