Need Support for DynamicQuantizeLinear(ONNX 11) #94

blogdefotsec · 2024-12-09T02:34:59Z

You Can quant an yolo model to get the test model like this:

from ultralytics import YOLO
import os
import numpy as np
import time
from PIL import Image
import onnxruntime
from onnxruntime import quantization
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantFormat, QuantType

def preprocess(input_model_path):
    model = YOLO(input_model_path)
    model.export(format="onnx")
    input_model_name = os.path.splitext(os.path.basename(input_model_path))[0]+'.onnx'
    quantization.quant_pre_process(
            input_model=input_model_name,
            output_model_path="pre_processed.onnx")

def dynamic_quant(model_input_path, Quant_Size = 'int8'):
    Quant_type_selected = QuantType.QUInt8
    if Quant_Size == 'int8':
        Quant_type_selected = QuantType.QInt8
    elif Quant_Size == 'uint8':
        Quant_type_selected = QuantType.QUInt8
    quantization.quantize_dynamic(
        model_input = model_input_path, 
        model_output = "output_dynamic.onnx",
        weight_type = Quant_type_selected
    )

class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder, augmented_model_path=None):
        self.image_folder = calibration_image_folder
        self.augmented_model_path = augmented_model_path
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0
 
    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            session = onnxruntime.InferenceSession(self.augmented_model_path, None)
            (_,_,height, width) = session.get_inputs()[0].shape
            nhwc_data_list = preprocess_func(self.image_folder, height, width, size_limit=0)
            input_name = session.get_inputs()[0].name
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{input_name: nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)
 
 
def preprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []
    for image_name in batch_filenames:
        # print(image_name)
        image_filepath = images_folder + '/' + image_name
        pillow_img = Image.new("RGB", (width, height))
        pillow_img.paste(Image.open(image_filepath).resize((width, height)))
        pillow_img.resize((640,640), 0)
        input_data = np.float32(pillow_img)/255.
        input_data = input_data[np.newaxis, :, :]
        input_data=input_data.transpose(0,3,1,2)
        input_data = np.array(input_data, dtype=np.float32)
        unconcatenated_batch_data.append(input_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data
 
def benchmark(model_path):
    """
    Testing Speed
    """
    session = onnxruntime.InferenceSession(model_path)
    input_name = session.get_inputs()[0].name
 
    total = 0.0
    runs = 10
    input_data = np.zeros((1,3,640,640), np.float32)  # fake tensor
    # warming up
    _ = session.run([], {input_name: input_data})
    for i in range(runs):
        start = time.perf_counter()
        _ = session.run([], {input_name: input_data})
        end = (time.perf_counter() - start) * 1000
        total += end
        print(f"{end:.2f}ms")
    total /= runs
    print(f"Avg: {total:.2f}ms")
 
 
def static_quant(input_model_path, output_model_path, calibration_dataset_path, Quant_Size='uint8', benchmark=False,):
    dr = DataReader(calibration_dataset_path, input_model_path)  
    activation_type_selected = QuantType.QUInt8
    weight_type_selected = QuantType.QUInt8
    if Quant_Size == 'int8':
        activation_type_selected = QuantType.QInt8
        weight_type_selected = QuantType.QInt8
    elif Quant_Size == 'uint8':
        activation_type_selected = QuantType.QUInt8
        weight_type_selected = QuantType.QUInt8

    quantize_static(input_model_path,
                    output_model_path,
                    dr,
                    quant_format=QuantFormat.QDQ,
                    activation_type=activation_type_selected,
                    weight_type=weight_type_selected)
    print("Quant Over")
    if benchmark:
        print("float32Test")
        benchmark(input_model_path)
        print("int8Test")
        benchmark(output_model_path)

def check_input(mode,yolo_model_path, Quant_Size, calibration_dataset_path=None, benchmark=False):
    if mode > 1:
        raise ValueError("mode must be 0: dynamic quant, 1: static quant")
    if os.path.splitext(os.path.basename(yolo_model_path))[1] != '.pt':
        raise ValueError("yolo_model_path must be a .pt file")
    if Quant_Size not in ['int8', 'uint8']:
        raise ValueError("Quant_Size must be int8 or uint8")
    if calibration_dataset_path != None:
        if os.path.exists(calibration_dataset_path) == False:
            raise ValueError("calibration_dataset_path must be a valid path")


def quant_yolo(mode, yolo_model_path, quant_size, calibration_dataset_path=None, benchmark_flag=False):
    if mode is None:
        print("Args:")
        print("mode: 0 for dynamic quant, 1 for static quant")
        print("yolo_model_path: path to yolo model")
        print("Quant_Size: int8 or uint8")
        print("calibration_dataset_path: path to calibration dataset, not required for dynamic quant")
        print("benchmark: True or False for checking the speed between quantilized and unquantilized, default False")
    check_input(mode, yolo_model_path, quant_size, calibration_dataset_path, benchmark)
    preprocess(yolo_model_path)
    if mode == 0:
        dynamic_quant('pre_processed.onnx', quant_size)
        if benchmark_flag:
            print('float32Test')
            benchmark('pre_processed.onnx')
            print('int8Test')
            benchmark('output_dynamic.onnx')
    elif mode == 1:
        static_quant('pre_processed.onnx', 'output_static.onnx', calibration_dataset_path, Quant_Size=quant_size, benchmark=benchmark)

if __name__ == '__main__':
    print("Usage:scriptname.quant_yolo(mode, yolo_model_path, quant_size, calibration_dataset_path, benchmark)")
    print("Args:")
    print("mode: 0 for dynamic quant, 1 for static quant")
    print("yolo_model_path: path to yolo model")
    print("Quant_Size: int8 or uint8")
    print("calibration_dataset_path: path to calibration dataset, not required for dynamic quant")
    print("benchmark: True or False for checking the speed between quantilized and unquantilized, default False")  
    # print("Test for Mode 0")
    # mode = 0
    # yolo_model_path = './yolov8n.pt'
    # quant_size = 'uint8'
    # calibration_dataset_path = 'path/to/your/images/to/calib'
    # benchmark_flag = True
    # quant_yolo(mode=mode, yolo_model_path=yolo_model_path, quant_size=quant_size, benchmark_flag=benchmark_flag)
    # print("Test for Mode 1")
    # mode = 1
    # quant_yolo(mode=mode, yolo_model_path=yolo_model_path, quant_size=quant_size, calibration_dataset_path=calibration_dataset_path, benchmark_flag=benchmark_flag)

Warning Like This:
UserWarning: node DynamicQuantizeLinear is not registed for profiling, return 0 Macs and 0 params as default. Use NODEPROFILER_REGISTRY to register your profiler for this node.
warnings.warn(f'node {n.op_type} is not registed for profiling, return 0 Macs and 0 params as default. '
NotImplementedError: this Node DynamicQuantizeLinear-images_QuantizeLinear has no value_infer

The text was updated successfully, but these errors were encountered:

blogdefotsec · 2024-12-09T02:39:15Z

And as if there's a guide for registing own node :)

ThanatosShinji · 2024-12-10T10:48:42Z

@blogdefotsec Hi, some codes guide you to register a custom layer: code

ThanatosShinji · 2024-12-10T10:50:25Z

I believe that you can register one similar layer like QuantizeLinear

blogdefotsec · 2024-12-13T07:09:12Z

Thanks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Need Support for DynamicQuantizeLinear(ONNX 11) #94

Need Support for DynamicQuantizeLinear(ONNX 11) #94

blogdefotsec commented Dec 9, 2024 •

edited

Loading

blogdefotsec commented Dec 9, 2024

ThanatosShinji commented Dec 10, 2024

ThanatosShinji commented Dec 10, 2024

blogdefotsec commented Dec 13, 2024

Need Support for DynamicQuantizeLinear(ONNX 11) #94

Need Support for DynamicQuantizeLinear(ONNX 11) #94

Comments

blogdefotsec commented Dec 9, 2024 • edited Loading

blogdefotsec commented Dec 9, 2024

ThanatosShinji commented Dec 10, 2024

ThanatosShinji commented Dec 10, 2024

blogdefotsec commented Dec 13, 2024

blogdefotsec commented Dec 9, 2024 •

edited

Loading