Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Need Support for DynamicQuantizeLinear(ONNX 11) #94

Open
blogdefotsec opened this issue Dec 9, 2024 · 4 comments
Open

Need Support for DynamicQuantizeLinear(ONNX 11) #94

blogdefotsec opened this issue Dec 9, 2024 · 4 comments

Comments

@blogdefotsec
Copy link

blogdefotsec commented Dec 9, 2024

You Can quant an yolo model to get the test model like this:

from ultralytics import YOLO
import os
import numpy as np
import time
from PIL import Image
import onnxruntime
from onnxruntime import quantization
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantFormat, QuantType

def preprocess(input_model_path):
    model = YOLO(input_model_path)
    model.export(format="onnx")
    input_model_name = os.path.splitext(os.path.basename(input_model_path))[0]+'.onnx'
    quantization.quant_pre_process(
            input_model=input_model_name,
            output_model_path="pre_processed.onnx")

def dynamic_quant(model_input_path, Quant_Size = 'int8'):
    Quant_type_selected = QuantType.QUInt8
    if Quant_Size == 'int8':
        Quant_type_selected = QuantType.QInt8
    elif Quant_Size == 'uint8':
        Quant_type_selected = QuantType.QUInt8
    quantization.quantize_dynamic(
        model_input = model_input_path, 
        model_output = "output_dynamic.onnx",
        weight_type = Quant_type_selected
    )

class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder, augmented_model_path=None):
        self.image_folder = calibration_image_folder
        self.augmented_model_path = augmented_model_path
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0
 
    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            session = onnxruntime.InferenceSession(self.augmented_model_path, None)
            (_,_,height, width) = session.get_inputs()[0].shape
            nhwc_data_list = preprocess_func(self.image_folder, height, width, size_limit=0)
            input_name = session.get_inputs()[0].name
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{input_name: nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)
 
 
def preprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []
    for image_name in batch_filenames:
        # print(image_name)
        image_filepath = images_folder + '/' + image_name
        pillow_img = Image.new("RGB", (width, height))
        pillow_img.paste(Image.open(image_filepath).resize((width, height)))
        pillow_img.resize((640,640), 0)
        input_data = np.float32(pillow_img)/255.
        input_data = input_data[np.newaxis, :, :]
        input_data=input_data.transpose(0,3,1,2)
        input_data = np.array(input_data, dtype=np.float32)
        unconcatenated_batch_data.append(input_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data
 
def benchmark(model_path):
    """
    Testing Speed
    """
    session = onnxruntime.InferenceSession(model_path)
    input_name = session.get_inputs()[0].name
 
    total = 0.0
    runs = 10
    input_data = np.zeros((1,3,640,640), np.float32)  # fake tensor
    # warming up
    _ = session.run([], {input_name: input_data})
    for i in range(runs):
        start = time.perf_counter()
        _ = session.run([], {input_name: input_data})
        end = (time.perf_counter() - start) * 1000
        total += end
        print(f"{end:.2f}ms")
    total /= runs
    print(f"Avg: {total:.2f}ms")
 
 
def static_quant(input_model_path, output_model_path, calibration_dataset_path, Quant_Size='uint8', benchmark=False,):
    dr = DataReader(calibration_dataset_path, input_model_path)  
    activation_type_selected = QuantType.QUInt8
    weight_type_selected = QuantType.QUInt8
    if Quant_Size == 'int8':
        activation_type_selected = QuantType.QInt8
        weight_type_selected = QuantType.QInt8
    elif Quant_Size == 'uint8':
        activation_type_selected = QuantType.QUInt8
        weight_type_selected = QuantType.QUInt8

    quantize_static(input_model_path,
                    output_model_path,
                    dr,
                    quant_format=QuantFormat.QDQ,
                    activation_type=activation_type_selected,
                    weight_type=weight_type_selected)
    print("Quant Over")
    if benchmark:
        print("float32Test")
        benchmark(input_model_path)
        print("int8Test")
        benchmark(output_model_path)

def check_input(mode,yolo_model_path, Quant_Size, calibration_dataset_path=None, benchmark=False):
    if mode > 1:
        raise ValueError("mode must be 0: dynamic quant, 1: static quant")
    if os.path.splitext(os.path.basename(yolo_model_path))[1] != '.pt':
        raise ValueError("yolo_model_path must be a .pt file")
    if Quant_Size not in ['int8', 'uint8']:
        raise ValueError("Quant_Size must be int8 or uint8")
    if calibration_dataset_path != None:
        if os.path.exists(calibration_dataset_path) == False:
            raise ValueError("calibration_dataset_path must be a valid path")


def quant_yolo(mode, yolo_model_path, quant_size, calibration_dataset_path=None, benchmark_flag=False):
    if mode is None:
        print("Args:")
        print("mode: 0 for dynamic quant, 1 for static quant")
        print("yolo_model_path: path to yolo model")
        print("Quant_Size: int8 or uint8")
        print("calibration_dataset_path: path to calibration dataset, not required for dynamic quant")
        print("benchmark: True or False for checking the speed between quantilized and unquantilized, default False")
    check_input(mode, yolo_model_path, quant_size, calibration_dataset_path, benchmark)
    preprocess(yolo_model_path)
    if mode == 0:
        dynamic_quant('pre_processed.onnx', quant_size)
        if benchmark_flag:
            print('float32Test')
            benchmark('pre_processed.onnx')
            print('int8Test')
            benchmark('output_dynamic.onnx')
    elif mode == 1:
        static_quant('pre_processed.onnx', 'output_static.onnx', calibration_dataset_path, Quant_Size=quant_size, benchmark=benchmark)

if __name__ == '__main__':
    print("Usage:scriptname.quant_yolo(mode, yolo_model_path, quant_size, calibration_dataset_path, benchmark)")
    print("Args:")
    print("mode: 0 for dynamic quant, 1 for static quant")
    print("yolo_model_path: path to yolo model")
    print("Quant_Size: int8 or uint8")
    print("calibration_dataset_path: path to calibration dataset, not required for dynamic quant")
    print("benchmark: True or False for checking the speed between quantilized and unquantilized, default False")  
    # print("Test for Mode 0")
    # mode = 0
    # yolo_model_path = './yolov8n.pt'
    # quant_size = 'uint8'
    # calibration_dataset_path = 'path/to/your/images/to/calib'
    # benchmark_flag = True
    # quant_yolo(mode=mode, yolo_model_path=yolo_model_path, quant_size=quant_size, benchmark_flag=benchmark_flag)
    # print("Test for Mode 1")
    # mode = 1
    # quant_yolo(mode=mode, yolo_model_path=yolo_model_path, quant_size=quant_size, calibration_dataset_path=calibration_dataset_path, benchmark_flag=benchmark_flag)

Warning Like This:
UserWarning: node DynamicQuantizeLinear is not registed for profiling, return 0 Macs and 0 params as default. Use NODEPROFILER_REGISTRY to register your profiler for this node.
warnings.warn(f'node {n.op_type} is not registed for profiling, return 0 Macs and 0 params as default. '
NotImplementedError: this Node DynamicQuantizeLinear-images_QuantizeLinear has no value_infer

@blogdefotsec
Copy link
Author

And as if there's a guide for registing own node :)

@ThanatosShinji
Copy link
Owner

@blogdefotsec Hi, some codes guide you to register a custom layer: code

@ThanatosShinji
Copy link
Owner

I believe that you can register one similar layer like QuantizeLinear

@blogdefotsec
Copy link
Author

Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants