Skip to content

Commit

Permalink
[PTQ] Quantizer scales unification (openvinotoolkit#1633)
Browse files Browse the repository at this point in the history
### Changes

- Added quantizer scales unification

### Reason for changes

- Performance improves

### Related tickets

- 105910

### Tests

- Updated
  • Loading branch information
KodiaqQ authored Apr 5, 2023
1 parent da15540 commit 0f7ddfc
Show file tree
Hide file tree
Showing 11 changed files with 1,656 additions and 1,541 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
limitations under the License.
"""

from typing import Dict, List, Optional, Tuple
import numpy as np
from typing import Dict, List, Tuple, Optional

from nncf.parameters import ModelType
from nncf.parameters import TargetDevice
Expand Down Expand Up @@ -46,7 +47,7 @@
from nncf.experimental.openvino_native.quantization.default_quantization import DEFAULT_OV_QUANT_TRAIT_TO_OP_DICT
from nncf.experimental.openvino_native.statistics.collectors import OVMeanMinMaxStatisticCollector
from nncf.experimental.openvino_native.statistics.collectors import OVMinMaxStatisticCollector

from nncf.experimental.openvino_native.statistics.statistics import OVMinMaxTensorStatistic
from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
from nncf.quantization.fake_quantize import FakeQuantizeParameters
Expand Down Expand Up @@ -97,6 +98,16 @@ def create_weight_quantizer_insertion_command(
parameters: FakeQuantizeParameters) -> OVQuantizerInsertionCommand:
return OVQuantizerInsertionCommand(target_point, parameters)

@staticmethod
def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTensorStatistic:
max_values, min_values = [], []
for statistic in statistics:
max_values.append(statistic.max_values)
min_values.append(statistic.min_values)
max_values = np.max(max_values, axis=0)
min_values = np.min(min_values, axis=0)
return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values)

@staticmethod
def _get_reduction_shape_and_use_abs_max(
nncf_graph: NNCFGraph,
Expand Down
92 changes: 81 additions & 11 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"""

from copy import deepcopy
from typing import Dict, TypeVar, Optional, OrderedDict
from typing import Dict, TypeVar, Optional, OrderedDict, List
import collections

from nncf import Dataset
Expand Down Expand Up @@ -167,6 +167,7 @@ def __init__(self, parameters: MinMaxQuantizationParameters):
self._quantization_target_points_to_qconfig = \
collections.OrderedDict() # type: OrderedDict[TargetPoint, QuantizerConfig]
self._parameters = parameters
self._unified_scale_groups = []

@property
def available_backends(self) -> Dict[str, BackendType]:
Expand Down Expand Up @@ -247,6 +248,7 @@ def _get_quantizer_setup(self, nncf_graph: NNCFGraph, pattern: GraphPattern) ->
Returns SingleConfigQuantizerSetup instance based on the input NNCFGraph.
:param nncf_graph: NNCFGraph instance.
:param pattern: GraphPattern instance.
:return: SingleConfigQuantizerSetup for the current NNCFGraph entity.
"""
hw_config_type = get_hw_config_type(self._parameters.target_device.value)
Expand Down Expand Up @@ -325,6 +327,19 @@ def _add_activation_quantization_target_point(self,
:param nncf_graph: NNCFGraph instance for working with the graph and nodes.
:param quantization_point: SingleConfigQuantizationPoint for the needed layer.
"""
activation_quantization_target_point = self._get_activation_quantization_target_point(quantization_point)
self._quantization_target_points_to_qconfig[activation_quantization_target_point] = quantization_point.qconfig

def _get_activation_quantization_target_point(
self,
quantization_point: SingleConfigQuantizationPoint) -> SingleConfigQuantizationPoint:
"""
Returns activation quantization target point to the set of existing points.
:param nncf_graph: NNCFGraph instance for working with the graph and nodes.
:param quantization_point: SingleConfigQuantizationPoint for the needed layer.
:return: SingleConfigQuantizationPoint for the needed layer.
"""
node_name = quantization_point.insertion_point.target_node_name
# If Quantization of node's input
if quantization_point.insertion_point.input_port_id is not None:
Expand All @@ -338,7 +353,7 @@ def _add_activation_quantization_target_point(self,
activation_quantization_target_point = self._backend_entity.target_point(TargetType.POST_LAYER_OPERATION,
node_name,
output_port_id)
self._quantization_target_points_to_qconfig[activation_quantization_target_point] = quantization_point.qconfig
return activation_quantization_target_point

def _get_quantization_target_points(self, model: TModel) -> OrderedDict[TargetPoint, QuantizerConfig]:
"""
Expand All @@ -349,17 +364,19 @@ def _get_quantization_target_points(self, model: TModel) -> OrderedDict[TargetPo
finds the quantization setup and processes it to the Set of Quantization Target Points.
:param model: Backend-specific model, for which Quantization Target Points are being seek.
:param nncf_graph: NNCFGraph instance.
:return: Set of Quantization Target Points.
"""
nncf_graph = NNCFGraphFactory.create(model) if self.nncf_graph is None else self.nncf_graph

if self._quantization_target_points_to_qconfig:
return self._quantization_target_points_to_qconfig
return self._quantization_target_points_to_qconfig, self._unified_scale_groups
backend = get_backend(model)
device = self._parameters.target_device
pattern = PatternsManager.get_full_pattern_graph(backend, device)
quantizer_setup = self._get_quantizer_setup(nncf_graph, pattern)
self._apply_model_type_pass(self._parameters.model_type, quantizer_setup, nncf_graph)
self._unified_scale_groups = self._collect_unified_groups(quantizer_setup)
for quantization_point in quantizer_setup.quantization_points.values():
if quantization_point.is_weight_quantization_point():
self._add_weight_quantization_target_point(quantization_point, nncf_graph)
Expand All @@ -369,24 +386,79 @@ def _get_quantization_target_points(self, model: TModel) -> OrderedDict[TargetPo
raise RuntimeError('Incorrect quantization point')
self._quantization_target_points_to_qconfig = collections.OrderedDict(
sorted(self._quantization_target_points_to_qconfig.items()))
return self._quantization_target_points_to_qconfig
return self._quantization_target_points_to_qconfig, self._unified_scale_groups

def _collect_unified_groups(self, quantizer_setup: SingleConfigQuantizerSetup) -> List[List[TargetPoint]]:
"""
Collects the group of quantizers for unification.
:param quantizer_setup: SingleConfigQuantizerSetup instance.
:return: List with the groups of the TargetPoints.
"""
unified_scale_groups = []
for quantizer_ids in quantizer_setup.unified_scale_groups.values():
unified_scale_group = []
for quantizer_id in quantizer_ids:
quantization_point = quantizer_setup.quantization_points[quantizer_id]

# Only activation quantizers can be unified
if quantization_point.is_activation_quantization_point():
activation_target_point = self._get_activation_quantization_target_point(quantization_point)
unified_scale_group.append(activation_target_point)
else:
raise RuntimeError('Only activation quantizers can be unified.')
unified_scale_groups.append(unified_scale_group)
return unified_scale_groups

def _get_graph_pattern(self, model: TModel) -> GraphPattern:
"""
Returns full graph pattern for quantizer setup calculation.
:param model: Backend-specific model.
:return: GraphPattern instance.
"""
backend = get_backend(model)
device = self._parameters.target_device
return PatternsManager.get_full_pattern_graph(backend, device)

def _apply(self,
model: TModel,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None) -> TModel:
transformation_layout, transformation_commands = TransformationLayout(), []
transformation_layout = TransformationLayout()
nncf_graph = NNCFGraphFactory.create(model) if self.nncf_graph is None else self.nncf_graph
model_transformer = ModelTransformerFactory.create(model)

quantization_target_points = self._get_quantization_target_points(model)
quantization_target_points, unified_scale_groups = self._get_quantization_target_points(model)
weight_layer_names = set()

def filter_func(point: StatisticPoint) -> bool:
return MinMaxQuantization in point.algorithm_to_tensor_collectors and \
point.target_point == quantization_target_point

unified_ops_list = set()
for unified_scale_group in unified_scale_groups:
group_statistics = []
for quantization_target_point in unified_scale_group:
target_node_name = quantization_target_point.target_node_name
for tensor_collector in statistic_points.get_algo_statistics_for_node(
target_node_name,
filter_func,
MinMaxQuantization):
group_statistics.append(tensor_collector.get_statistics())
unified_values = self._backend_entity.unify_statistics(group_statistics)
for quantization_target_point in unified_scale_group:
qconfig = quantization_target_points[quantization_target_point]
parameters = calculate_quantizer_parameters(unified_values, qconfig, QuantizerGroup.ACTIVATIONS)
command = self._backend_entity.create_activation_quantizer_insertion_command(
nncf_graph, quantization_target_point,
qconfig, parameters)
transformation_layout.register(command)
unified_ops_list.add(quantization_target_point)

for quantization_target_point, qconfig in quantization_target_points.items():
if quantization_target_point in unified_ops_list:
continue
target_node_name = quantization_target_point.target_node_name
for tensor_collector in statistic_points.get_algo_statistics_for_node(
target_node_name,
Expand All @@ -409,18 +481,16 @@ def filter_func(point: StatisticPoint) -> bool:
command = self._backend_entity.create_activation_quantizer_insertion_command(
nncf_graph, quantization_target_point, qconfig, parameters)

transformation_commands.append(command)

for transformation_command in transformation_commands:
transformation_layout.register(transformation_command)
transformation_layout.register(command)

quantized_model = model_transformer.transform(transformation_layout)
return quantized_model

def get_statistic_points(self, model: TModel) -> StatisticPointsContainer:
self._set_backend_entity(model)
quantization_target_points = self._get_quantization_target_points(model)
nncf_graph = NNCFGraphFactory.create(model) if self.nncf_graph is None else self.nncf_graph

quantization_target_points, _ = self._get_quantization_target_points(model)
output = StatisticPointsContainer()
for quantization_target_point, qconfig in quantization_target_points.items():
nncf_logger.debug(f'Adding target point {quantization_target_point.target_node_name}'
Expand Down
11 changes: 11 additions & 0 deletions nncf/quantization/algorithms/min_max/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from nncf.common.graph.transformations.commands import TransformationCommand
from nncf.common.hardware.config import HWConfig
from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase
from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic
from nncf.common.utils.registry import Registry
from nncf.common.quantization.structs import QuantizerConfig
from nncf.quantization.fake_quantize import FakeQuantizeParameters
Expand Down Expand Up @@ -115,6 +116,16 @@ def create_weight_quantizer_insertion_command(nncf_graph: NNCFGraph,
:return: Backend-specific TransformationCommand for the quantizer insertion operation.
"""

@staticmethod
@abstractmethod
def unify_statistics(statistics: List[MinMaxTensorStatistic]) -> MinMaxTensorStatistic:
"""
Returns backend-specific unified statistics.
:param statistics: List of MinMaxTensorStatistic instances.
:return: Unified MinMaxTensorStatistic value.
"""

@staticmethod
@abstractmethod
def minmax_statistic_collector(nncf_graph: NNCFGraph,
Expand Down
13 changes: 12 additions & 1 deletion nncf/quantization/algorithms/min_max/onnx_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
limitations under the License.
"""

from typing import Dict, List, Tuple, Optional
import numpy as np
from typing import Dict, List, Tuple, Optional

from nncf.parameters import ModelType
from nncf.parameters import TargetDevice
Expand Down Expand Up @@ -45,6 +45,7 @@
from nncf.onnx.graph.node_utils import get_input_edges_mapping
from nncf.onnx.statistics.collectors import ONNXMeanMinMaxStatisticCollector
from nncf.onnx.statistics.collectors import ONNXMinMaxStatisticCollector
from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic

from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
Expand Down Expand Up @@ -108,6 +109,16 @@ def create_weight_quantizer_insertion_command(
onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis)
return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters)

@staticmethod
def unify_statistics(statistics: List[ONNXMinMaxTensorStatistic]) -> ONNXMinMaxTensorStatistic:
max_values, min_values = [], []
for statistic in statistics:
max_values.append(statistic.max_values)
min_values.append(statistic.min_values)
max_values = np.max(max_values, axis=0)
min_values = np.min(min_values, axis=0)
return ONNXMinMaxTensorStatistic(min_values=min_values, max_values=max_values)

@staticmethod
def _get_input_edges_mapping(nncf_graph: NNCFGraph):
return get_input_edges_mapping(nncf_graph)
Expand Down
11 changes: 11 additions & 0 deletions nncf/quantization/algorithms/min_max/torch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from nncf.torch.graph.graph import PTTargetPoint
from nncf.torch.graph.transformations.commands import PTInsertionCommand
from nncf.torch.tensor_statistics.collectors import PTMinMaxStatisticCollector
from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic
from nncf.torch.tensor_statistics.collectors import PTMeanMinMaxStatisticCollector

from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
Expand Down Expand Up @@ -119,6 +120,16 @@ def create_weight_quantizer_insertion_command(
quantizer_config,
parameters)

@staticmethod
def unify_statistics(statistics: List[PTMinMaxTensorStatistic]) -> PTMinMaxTensorStatistic:
max_values, min_values = [], []
for statistic in statistics:
max_values.append(statistic.max_values)
min_values.append(statistic.min_values)
max_values = torch.max(torch.tensor(max_values))
min_values = torch.min(torch.tensor(min_values))
return PTMinMaxTensorStatistic(min_values=min_values, max_values=max_values)

@staticmethod
def minmax_statistic_collector(nncf_graph: NNCFGraph,
target_point: PTTargetPoint,
Expand Down
Loading

0 comments on commit 0f7ddfc

Please sign in to comment.