Skip to content

Commit

Permalink
Model thinning: bug fix – aggressive channel/filter pruning raises an…
Browse files Browse the repository at this point in the history
… exception

* Fix bug: taking the len() of a zero-dimensional ‘indices’ tensor is not legal.
    Use nelement() instead.
    A zero-dim ‘indices’ tensor occurs when the pruning is very aggressive and
    leaves one channel or filter in the tensor.
* Protect again pruning of all channels/filters of a layer: Raise ValueError if
  trying to create (thru thinning) a Convolution layer with zero channels or filters.
* Tests:
* Some PEP8 cleanup.
* Add some test documentation.
* Refactored some test code to tests/common.py
* Added testing of pruning all the channels/filters in a Convolution
  • Loading branch information
nzmora committed Jun 26, 2018
1 parent 94d3d51 commit a1cf959
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 80 deletions.
75 changes: 43 additions & 32 deletions distiller/thinning.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@

import math
import logging
import copy
import re
from collections import namedtuple
import torch
from .policy import ScheduledTrainingPolicy
import distiller
from distiller import normalize_module_name, denormalize_module_name
from apputils import SummaryGraph
from models import ALL_MODEL_NAMES, create_model
from models import create_model
msglogger = logging.getLogger()

ThinningRecipe = namedtuple('ThinningRecipe', ['modules', 'parameters'])
Expand Down Expand Up @@ -67,7 +65,7 @@
__all__ = ['ThinningRecipe', 'resnet_cifar_remove_layers',
'ChannelRemover', 'remove_channels',
'FilterRemover', 'remove_filters',
'find_nonzero_channels',
'find_nonzero_channels', 'find_nonzero_channels_list',
'execute_thinning_recipes_list']


Expand Down Expand Up @@ -177,15 +175,21 @@ def find_nonzero_channels(param, param_name):
k_sums_mat = kernel_sums.view(num_filters, num_channels).t()
nonzero_channels = torch.nonzero(k_sums_mat.abs().sum(dim=1))

if num_channels > len(nonzero_channels):
if num_channels > nonzero_channels.nelement():
msglogger.info("In tensor %s found %d/%d zero channels", param_name,
num_filters - len(nonzero_channels), num_filters)
num_channels - nonzero_channels.nelement(), num_channels)

return nonzero_channels


def find_nonzero_channels_list(param, param_name):
nnz_channels = find_nonzero_channels(param, param_name)
nnz_channels = nnz_channels.view(nnz_channels.numel())
return nnz_channels.cpu().numpy().tolist()


def apply_and_save_recipe(model, zeros_mask_dict, thinning_recipe):
if len(thinning_recipe.modules)>0 or len(thinning_recipe.parameters)>0:
if len(thinning_recipe.modules) > 0 or len(thinning_recipe.parameters) > 0:
# Now actually remove the filters, chaneels and make the weight tensors smaller
execute_thinning_recipe(model, zeros_mask_dict, thinning_recipe)

Expand All @@ -195,7 +199,7 @@ def apply_and_save_recipe(model, zeros_mask_dict, thinning_recipe):
model.thinning_recipes.append(thinning_recipe)
else:
model.thinning_recipes = [thinning_recipe]
msglogger.info("Created, applied and saved a filter-thinning recipe")
msglogger.info("Created, applied and saved a thinning recipe")
else:
msglogger.error("Failed to create a thinning recipe")

Expand All @@ -220,7 +224,7 @@ def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
msglogger.info("Invoking create_thinning_recipe_channels")

thinning_recipe = ThinningRecipe(modules={}, parameters={})
layers = {mod_name : m for mod_name, m in model.named_modules()}
layers = {mod_name: m for mod_name, m in model.named_modules()}

# Traverse all of the model's parameters, search for zero-channels, and
# create a thinning recipe that descibes the required changes to the model.
Expand All @@ -231,16 +235,18 @@ def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):

num_channels = param.size(1)
nonzero_channels = find_nonzero_channels(param, param_name)

num_nnz_channels = nonzero_channels.nelement()
if num_nnz_channels == 0:
raise ValueError("Trying to set zero channels for parameter %s is not allowed" % param_name)
# If there are non-zero channels in this tensor then continue to next tensor
if num_channels <= len(nonzero_channels):
if num_channels <= num_nnz_channels:
continue

# We are removing channels, so update the number of incoming channels (IFMs)
# in the convolutional layer
layer_name = param_name_2_layer_name(param_name)
assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
append_module_directive(thinning_recipe, layer_name, key='in_channels', val=len(nonzero_channels))
append_module_directive(thinning_recipe, layer_name, key='in_channels', val=num_nnz_channels)

# Select only the non-zero filters
indices = nonzero_channels.data.squeeze()
Expand All @@ -252,7 +258,7 @@ def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
predecessors = [denormalize_module_name(model, predecessor) for predecessor in predecessors]
for predecessor in predecessors:
# For each of the convolutional layers that preceed, we have to reduce the number of output channels.
append_module_directive(thinning_recipe, predecessor, key='out_channels', val=len(nonzero_channels))
append_module_directive(thinning_recipe, predecessor, key='out_channels', val=num_nnz_channels)

# Now remove channels from the weights tensor of the successor conv
append_param_directive(thinning_recipe, predecessor+'.weight', (0, indices))
Expand All @@ -263,7 +269,8 @@ def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
assert len(bn_layers) == 1
# Thinning of the BN layer that follows the convolution
bn_layer_name = denormalize_module_name(model, bn_layers[0])
bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_channels), thin_features=indices)
bn_thinning(thinning_recipe, layers, bn_layer_name,
len_thin_features=num_nnz_channels, thin_features=indices)

return thinning_recipe

Expand All @@ -281,7 +288,7 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
msglogger.info("Invoking create_thinning_recipe_filters")

thinning_recipe = ThinningRecipe(modules={}, parameters={})
layers = {mod_name : m for mod_name, m in model.named_modules()}
layers = {mod_name: m for mod_name, m in model.named_modules()}

for param_name, param in model.named_parameters():
# We are only interested in 4D weights
Expand All @@ -292,20 +299,22 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
filter_view = param.view(param.size(0), -1)
num_filters = filter_view.size()[0]
nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))

num_nnz_filters = nonzero_filters.nelement()
if num_nnz_filters == 0:
raise ValueError("Trying to set zero filters for parameter %s is not allowed" % param_name)
# If there are non-zero filters in this tensor then continue to next tensor
if num_filters <= len(nonzero_filters):
if num_filters <= num_nnz_filters:
msglogger.debug("SKipping {} shape={}".format(param_name_2_layer_name(param_name), param.shape))
continue

msglogger.info("In tensor %s found %d/%d zero filters", param_name,
num_filters - len(nonzero_filters), num_filters)
num_filters - num_nnz_filters, num_filters)

# We are removing filters, so update the number of outgoing channels (OFMs)
# in the convolutional layer
layer_name = param_name_2_layer_name(param_name)
assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
append_module_directive(thinning_recipe, layer_name, key='out_channels', val=len(nonzero_filters))
append_module_directive(thinning_recipe, layer_name, key='out_channels', val=num_nnz_filters)

# Select only the non-zero filters
indices = nonzero_filters.data.squeeze()
Expand All @@ -323,17 +332,16 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):

if isinstance(layers[successor], torch.nn.modules.Conv2d):
# For each of the convolutional layers that follow, we have to reduce the number of input channels.
append_module_directive(thinning_recipe, successor, key='in_channels', val=len(nonzero_filters))
msglogger.info("[recipe] {}: setting in_channels = {}".format(successor, len(nonzero_filters)))
append_module_directive(thinning_recipe, successor, key='in_channels', val=num_nnz_filters)
msglogger.info("[recipe] {}: setting in_channels = {}".format(successor, num_nnz_filters))

# Now remove channels from the weights tensor of the successor conv
append_param_directive(thinning_recipe, successor+'.weight', (1, indices))

elif isinstance(layers[successor], torch.nn.modules.Linear):
# If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
fm_size = layers[successor].in_features // layers[layer_name].out_channels
in_features = fm_size * len(nonzero_filters)
#append_module_directive(thinning_recipe, layer_name, key='in_features', val=in_features)
in_features = fm_size * num_nnz_filters
append_module_directive(thinning_recipe, successor, key='in_features', val=in_features)
msglogger.info("[recipe] {}: setting in_features = {}".format(successor, in_features))

Expand All @@ -350,7 +358,8 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
assert len(bn_layers) == 1
# Thinning of the BN layer that follows the convolution
bn_layer_name = denormalize_module_name(model, bn_layers[0])
bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_filters), thin_features=indices)
bn_thinning(thinning_recipe, layers, bn_layer_name,
len_thin_features=num_nnz_filters, thin_features=indices)
return thinning_recipe


Expand Down Expand Up @@ -423,8 +432,9 @@ def execute_thinning_recipe(model, zeros_mask_dict, recipe, loaded_from_file=Fal
dim_to_trim = val[0]
indices_to_select = val[1]
# Check if we're trying to trim a parameter that is already "thin"
if running.size(dim_to_trim) != len(indices_to_select):
msglogger.info("[thinning] {}: setting {} to {}".format(layer_name, attr, len(indices_to_select)))
if running.size(dim_to_trim) != indices_to_select.nelement():
msglogger.info("[thinning] {}: setting {} to {}".
format(layer_name, attr, indices_to_select.nelement()))
setattr(layers[layer_name], attr,
torch.index_select(running, dim=dim_to_trim, index=indices_to_select))
else:
Expand All @@ -438,34 +448,35 @@ def execute_thinning_recipe(model, zeros_mask_dict, recipe, loaded_from_file=Fal
for directive in param_directives:
dim = directive[0]
indices = directive[1]
len_indices = indices.nelement()
if len(directive) == 4: # TODO: this code is hard to follow
selection_view = param.view(*directive[2])
# Check if we're trying to trim a parameter that is already "thin"
if param.data.size(dim) != len(indices):
if param.data.size(dim) != len_indices:
param.data = torch.index_select(selection_view, dim, indices)

if param.grad is not None:
# We also need to change the dimensions of the gradient tensor.
grad_selection_view = param.grad.resize_(*directive[2])
if grad_selection_view.size(dim) != len(indices):
if grad_selection_view.size(dim) != len_indices:
param.grad = torch.index_select(grad_selection_view, dim, indices)

param.data = param.view(*directive[3])
if param.grad is not None:
param.grad = param.grad.resize_(*directive[3])
else:
if param.data.size(dim) != len(indices):
if param.data.size(dim) != len_indices:
param.data = torch.index_select(param.data, dim, indices)
# We also need to change the dimensions of the gradient tensor.
# If have not done a backward-pass thus far, then the gradient will
# not exist, and therefore won't need to be re-dimensioned.
if param.grad is not None and param.grad.size(dim) != len(indices):
if param.grad is not None and param.grad.size(dim) != len_indices:
param.grad = torch.index_select(param.grad, dim, indices)
msglogger.info("[thinning] changing param {} shape: {}".format(param_name, len(indices)))
msglogger.info("[thinning] changing param {} shape: {}".format(param_name, len_indices))

if not loaded_from_file:
# If the masks are loaded from a checkpoint file, then we don't need to change
# their shape, because they are already correctly shaped
mask = zeros_mask_dict[param_name].mask
if mask is not None and (mask.size(dim) != len(indices)):
if mask is not None and (mask.size(dim) != len_indices):
zeros_mask_dict[param_name].mask = torch.index_select(mask, dim, indices)
42 changes: 42 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#
# Copyright (c) 2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
sys.path.append(module_path)
import distiller
from models import create_model


def setup_test(arch, dataset):
model = create_model(False, dataset, arch, parallel=False)
assert model is not None

# Create the masks
zeros_mask_dict = {}
for name, param in model.named_parameters():
masker = distiller.ParameterMasker(name)
zeros_mask_dict[name] = masker
return model, zeros_mask_dict


def find_module_by_name(model, module_to_find):
for name, m in model.named_modules():
if name == module_to_find:
return m
return None
Loading

0 comments on commit a1cf959

Please sign in to comment.