fix a bug in dumping real data; use python3.6 in command line to avoi…

…d calling incompatible python3.5 in advp docker image; fix bugs in testcase_int in remap.py
suchandler96 · Feb 22, 2024 · 60660d9 · 60660d9
1 parent 622c120
commit 60660d9
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -135,16 +135,16 @@ This section provides the process to generate the files in `bsc-util/nvdla_utili
 ```
 $ docker run -it --rm -v ~/:/home edwinlai99/advp:v1
 (advp)# cd /home/gem5-nvdla/bsc-util/nvdla_utilities/
-(advp)# python3 caffe2trace.py --model-name lenet --caffemodel example_usage/caffe_models/lenet/lenet_iter_10000.caffemodel --prototxt example_usage/caffe_models/lenet/Lenet.prototxt --out-dir /home/nvdla/traces/lenet/
+(advp)# python3.6 caffe2trace.py --model-name lenet --caffemodel example_usage/caffe_models/lenet/lenet_iter_10000.caffemodel --prototxt example_usage/caffe_models/lenet/Lenet.prototxt --out-dir /home/nvdla/traces/lenet/
 ```
 Then the log files and `*.txn` register trace will appear in `/home/nvdla/traces/lenet/`.
 
 # Compile a Pipelined Multibatch NN
 Our repo provides a scheduler that can map multiple batches of NN inference task of a single model onto multiple simulated NVDLAs. This script, `pipeline_compile.py`, thus helps to compile multiple prototxt files at the same time. This script expects the user to manually split a Caffe NN into multiple `*.prototxt` files (the `*.caffemodel` does not need to be modified), each of which corresponds to a pipeline stage. These `.prototxt` files should be provided to the script in the order of pipeline stages. Users are expected to use a subclass of `PipelineRemapper` in `match_reg_trace_addr/remap.py` when doing parameter sweeps for pipelined workloads. See below for usage:
 ```
-$ docker run -it --rm -v ~/:/home edwinlai99/advp
+$ docker run -it --rm -v ~/:/home edwinlai99/advp:v1
 (advp)# cd /home/gem5-nvdla/bsc-util/nvdla_utilities/
-(advp)# python3 pipeline_compile.py --model-name lenet --caffemodel example_usage/caffe_models/lenet/lenet_iter_10000.caffemodel --prototxts /home/gem5-nvdla/bsc-util/nvdla_utilities/example_usage/traces/lenet_pipeline/stage_1/lenet_stage1.prototxt /home/gem5-nvdla/bsc-util/nvdla_utilities/example_usage/traces/lenet_pipeline/stage_2/lenet_stage2.prototxt --out-dir /home/nvdla/traces/lenet_pipeline/
+(advp)# python3.6 pipeline_compile.py --model-name lenet --caffemodel example_usage/caffe_models/lenet/lenet_iter_10000.caffemodel --prototxts /home/gem5-nvdla/bsc-util/nvdla_utilities/example_usage/traces/lenet_pipeline/stage_1/lenet_stage1.prototxt /home/gem5-nvdla/bsc-util/nvdla_utilities/example_usage/traces/lenet_pipeline/stage_2/lenet_stage2.prototxt --out-dir /home/nvdla/traces/lenet_pipeline/
 ```
 
 ## Developer Tips

diff --git a/bsc-util/nvdla_utilities/caffe2trace.py b/bsc-util/nvdla_utilities/caffe2trace.py
@@ -179,7 +179,7 @@ def process_log(options):
     nvdla_utilities_dir = os.path.dirname(os.path.abspath(__file__))
     workload = Workload(options.out_dir, True, options.true_data, options.dump_results)
     parse_mixed_type_trace(os.path.join(options.out_dir, "VP_mem_rd_wr"))
-    os.system("cd " + nvdla_utilities_dir + " && python3 fix_txn_discontinuous.py --vp-out-dir " + options.out_dir +
+    os.system("cd " + nvdla_utilities_dir + " && python3.6 fix_txn_discontinuous.py --vp-out-dir " + options.out_dir +
               " --name try_input")
     os.system("cd " + options.out_dir + " && mv input.txn bkp_input.txn")
     os.system("cd " + options.out_dir + " && mv try_input.txn input.txn")

diff --git a/bsc-util/nvdla_utilities/match_reg_trace_addr/parse_qemu_log.py b/bsc-util/nvdla_utilities/match_reg_trace_addr/parse_qemu_log.py
@@ -220,8 +220,16 @@ def __init__(self, in_dir, to_convert=False, use_real_data=False, dump_results=F
                     assert info_match.group(3).count(' ') == data_len // 4 - 1
                     uint32_ts = info_match.group(3).replace("X", "0").replace("_", "").split()
                     contents = [int(uint32_t, 16) for uint32_t in uint32_ts]
-                    if addr in memory and memory[addr] != contents:
+
+                    '''
+                    is_write_match = re.search("iswrite=([0-9]+)", line)
+                    assert is_write_match is not None
+                    is_write = int(is_write_match.group(1))
+
+                    # only intermediate activations will be overwritten. We don't need their values in input.txn
+                    if is_write == 0 and (addr in memory and memory[addr] != contents):
                         print("inconsistent memory access result!\nPrevious:\n", memory[addr], "\nNow:\n", contents)
+                    '''
                     memory[addr] = contents
 
             to_get_data = self.weights + self.inputs + self.outputs if self.dump_results else self.weights + self.inputs
@@ -243,11 +251,11 @@ def __init__(self, in_dir, to_convert=False, use_real_data=False, dump_results=F
                 for addr in range(aligned_start, aligned_end_ceil, self.axi_width):
                     # each entry has length self.axi_width
                     contents = memory[addr]
-                    this_txn_st = max(data_blk.addr, addr) % self.axi_width
-                    this_txn_ed = min(addr + self.axi_width, data_blk.addr + data_blk.size) % self.axi_width
+                    this_txn_st = max(data_blk.addr, addr)
+                    this_txn_ed = min(addr + self.axi_width, data_blk.addr + data_blk.size)
                     for byte_id in range(this_txn_st, this_txn_ed):
-                        int_id = byte_id // 4
-                        offset = (byte_id % 4) * 8
+                        int_id = (byte_id % self.axi_width) // 4
+                        offset = ((byte_id % self.axi_width) % 4) * 8
                         byte = (contents[int_id] >> offset) & 0xff
                         file_line += "0x%02x " % byte
                         if bytes_in_this_line == 31:
@@ -307,7 +315,7 @@ def read_compile_log(self):
         with open(os.path.join(self.in_dir, "compile_log")) as fp:
             lines = fp.readlines()
 
-        desc2uid = {}   # {(addr_id, offset, size): (tsd_str, tb_str)}, to examine the mapping is a bijection
+        desc2uid = {}   # {(addr_id, offset): (tsd_str, tb_str)}, to examine the mapping is a bijection
         uid2desc = {}
         # first build the above two mappings
         for line_id, line in enumerate(lines):
@@ -325,6 +333,7 @@ def read_compile_log(self):
                         assert data_blk.uid is None     # assume each tensor is reported once
                         data_blk.uid = uid
 
+        '''
         # then get ALLOC and DEALLOC time info
         for line_id, line in enumerate(lines):
             if "[MEMTOOL]" in line:
@@ -352,6 +361,7 @@ def read_compile_log(self):
                         self.data[desc].liveness[0] = time_stamp
                 else:
                     assert False
+        '''
 
     def sclog2traces(self):
         txn_lines = []

diff --git a/bsc-util/nvdla_utilities/match_reg_trace_addr/remap.py b/bsc-util/nvdla_utilities/match_reg_trace_addr/remap.py
@@ -3,6 +3,7 @@
 import os
 import argparse
 import re
+import errno
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 
@@ -13,7 +14,7 @@
 class BaseRemapper:
     def __init__(self, in_dir, model_name):
         """ paths """
-        self.in_dir = in_dir
+        self.in_dir = in_dir    # expect in_dir to be VP out dir
         self.model_name = model_name
 
         """ workload-related info """
@@ -75,11 +76,12 @@ def write_to_files(self):
                   os.path.join(self.out_dir, "trace.bin"))
         rd_var_log_path = os.path.join(self.out_dir, "rd_only_var_log")
         """ generate rd_only_var_log """
-        with open(rd_var_log_path, "wb") as fp:
-            for rd_only_var in self.workload.rd_only_vars:
-                data_blk = self.workload.data[rd_only_var]
-                fp.write(data_blk.addr.to_bytes(4, byteorder="little", signed=False))
-                fp.write(data_blk.size.to_bytes(4, byteorder="little", signed=False))
+        if not os.path.exists(rd_var_log_path):
+            with open(rd_var_log_path, "wb") as fp:
+                for rd_only_var in self.workload.rd_only_vars:
+                    data_blk = self.workload.data[rd_only_var]
+                    fp.write(data_blk.addr.to_bytes(4, byteorder="little", signed=False))
+                    fp.write(data_blk.size.to_bytes(4, byteorder="little", signed=False))
 
 
 class CVSRAMRemapper(BaseRemapper):
@@ -122,6 +124,8 @@ def change_ram_type_to_cvsram(self, raw_lines, modified_lines, line_id, modify_s
         line = raw_lines[line_id]
         # use regex to get the register
         reg_match = re.search(r'#\s?(0x[0-9a-f]{1,2}0[0-9a-f]{2})', line)
+        if reg_match is None:       # not register txns. May be {load|dump}_mem
+            return
         reg = int(reg_match.group(1), 16)
         ram_type_reg, ram_type_bit = self.assoc_reg_bits[reg]
 
@@ -176,6 +180,22 @@ def testcase_init(self, out_dir, sim_dir, testcase_str):
         BaseRemapper.testcase_init(self, out_dir, sim_dir, testcase_str)
         self.mapping.clear()
 
+        for root, dirs, files in os.walk(self.in_dir):
+            if os.path.abspath(root) == os.path.abspath(self.in_dir):
+                # create symbolic links of *.dat files under root in out_dir
+                for file in files:
+                    if file.endswith(".dat"):
+                        src = os.path.abspath(os.path.join(root, file))
+                        link = os.path.abspath(os.path.join(out_dir, file))
+                        try:
+                            os.symlink(src, link)
+                        except OSError as e:
+                            if e.errno == errno.EEXIST:
+                                os.remove(link)
+                                os.symlink(src, link)
+                            else:
+                                raise e
+
     def set_cvsram_param(self, num_cvsram, cvsram_base_addrs, cvsram_sizes):
         CVSRAMRemapper.set_cvsram_param(self, num_cvsram, cvsram_base_addrs, cvsram_sizes)
         assert num_cvsram == 1
@@ -190,7 +210,9 @@ def write_to_files(self):
         for orig_addr, mapped_addr in self.mapping.items():
             for line_id, line in enumerate(raw_txn_lines):
                 if hex(orig_addr) in line and not modify_status[line_id]:
-                    new_lines[line_id] = line.replace(hex(orig_addr), hex(mapped_addr))
+                    new_lines[line_id] = line.replace(hex(orig_addr), hex(mapped_addr), 1)
+                    # the parameter "1" is crucial since in {load|dump}_mem, files are named with addresses
+                    # we want to keep the file name unchanged after remapping
                     modify_status[line_id] = True
 
                     self.change_ram_type_to_cvsram(raw_txn_lines, new_lines, line_id, modify_status)
@@ -528,7 +550,7 @@ def __init__(self, in_dir, model_name):
         CVSRAMRemapper.__init__(self, in_dir, model_name)
 
     def testcase_init(self, out_dir, sim_dir, testcase_str):
-        BaseRemapper.testcase_init(self, out_dir, sim_dir, testcase_str)
+        PipelineRemapper.testcase_init(self, out_dir, sim_dir, testcase_str)
 
     def remap_weights(self):
         next_avail_aligned = self.weight_base_addr                              # next available addr in DRAM
@@ -563,7 +585,7 @@ def __init__(self, in_dir, model_name):
         CVSRAMRemapper.__init__(self, in_dir, model_name)
 
     def testcase_init(self, out_dir, sim_dir, testcase_str):
-        BaseRemapper.testcase_init(self, out_dir, sim_dir, testcase_str)
+        PipelineRemapper.testcase_init(self, out_dir, sim_dir, testcase_str)
 
     def remap_activations(self):
         PipelineRemapper.remap_activations(self)

diff --git a/bsc-util/nvdla_utilities/pipeline_compile.py b/bsc-util/nvdla_utilities/pipeline_compile.py
@@ -57,7 +57,7 @@ def main():
         assert os.path.exists(options.prototxts[i])
         os.system("cp " + options.prototxts[i] + " " + work_dir)
         os.system("cd " + os.path.dirname(os.path.abspath(__file__)) +
-                  " && python3 caffe2trace.py --model-name " + options.model_name +
+                  " && python3.6 caffe2trace.py --model-name " + options.model_name +
                   "_stage_" + str(i + 1) + " --caffemodel " + os.path.abspath(options.caffemodel) + " --prototxt " +
                   os.path.abspath(options.prototxts[i]) +
                   " --nvdla-compiler " + os.path.abspath(options.nvdla_compiler) + " --qemu-bin " +