diff --git a/examples/torch/classification/README.md b/examples/torch/classification/README.md index 9b14683add4..fed496581bc 100644 --- a/examples/torch/classification/README.md +++ b/examples/torch/classification/README.md @@ -126,7 +126,7 @@ As an example of NNCF convolution binarization capabilities, you may use the con |ResNet-18|Filter pruning, 40%, magnitude criterion|ImageNet|69.26 (0.54)|2.75 (75.75%)|9.23 (79.02%)|[Link](configs/pruning/resnet18_pruning_magnitude.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/torch/resnet18_imagenet_filter_pruning_magnitude.pth)| |ResNet-18|Filter pruning, 40%, geometric median criterion|ImageNet|69.32 (0.48)|2.75 (75.75%)|9.23 (79.02%)|[Link](configs/pruning/resnet18_pruning_geometric_median.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/torch/resnet18_imagenet_filter_pruning_geomean.pth)| |ResNet-34|None|ImageNet|73.26|7.33 (100%)|21.78 (100%)|[Link](configs/pruning/resnet34_imagenet.json)|-| -|ResNet-34|Filter pruning, 40%, geometric median criterion|ImageNet|72.72 (0.54)|5.06 (69.03%)|15.47 (71.03%)|[Link](configs/pruning/resnet34_pruning_geometric_median.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/torch/resnet34_imagenet_filter_pruning_geomean.pth)| +|ResNet-34|Filter pruning, 50%, geometric median criterion + KD|ImageNet|73.11 (0.15)|4.32 (58.96%)|13.56 (62.25%)|[Link](configs/pruning/resnet34_pruning_geometric_median_kd.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/torch/resnet34_imagenet_filter_pruning_geomean_kd.pth)| |GoogLeNet|None|ImageNet|69.72|2.99 (100%)|6.61 (100%)|[Link](configs/pruning/googlenet_imagenet.json)|-| |GoogLeNet|Filter pruning, 40%, geometric median criterion|ImageNet|68.89 (0.83)|1.36 (45.48%)|3.47 (52.50%)|[Link](configs/pruning/googlenet_pruning_geometric_median.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/torch/googlenet_imagenet_filter_pruning_geomean.pth)| diff --git a/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median.json b/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median.json deleted file mode 100644 index c9a3c1ac5b1..00000000000 --- a/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "model": "resnet34", - "pretrained": true, - "batch_size" : 512, - "epochs": 100, - "input_info": { - "sample_size": [1, 3, 224, 224] - }, - "optimizer": { - "type": "SGD", - "base_lr": 0.1, - "weight_decay": 1e-4, - "schedule_type": "multistep", - "steps": [ - 20, - 40, - 60, - 80 - ], - "optimizer_params": - { - "momentum": 0.9, - "nesterov": true - } - }, - "compression": [ - { - "algorithm": "filter_pruning", - "pruning_init": 0.1, - "params": { - "schedule": "exponential", - "pruning_target": 0.4, - "pruning_steps": 15, - "filter_importance": "geometric_median" - } - } - ] -} \ No newline at end of file diff --git a/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median_kd.json b/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median_kd.json index 6e5e3bc6b96..c8b27d2e736 100644 --- a/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median_kd.json +++ b/examples/torch/classification/configs/pruning/resnet34_pruning_geometric_median_kd.json @@ -12,9 +12,9 @@ "weight_decay": 1e-4, "schedule_type": "multistep", "steps": [ - 20, - 40, - 60, + 25, + 45, + 65, 80 ], "optimizer_params": @@ -29,8 +29,8 @@ "pruning_init": 0.1, "params": { "schedule": "exponential", - "pruning_target": 0.4, - "pruning_steps": 15, + "pruning_target": 0.5, + "pruning_steps": 20, "filter_importance": "geometric_median" } }, @@ -39,4 +39,4 @@ "type": "softmax" // or mse } ] -} \ No newline at end of file +} diff --git a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch b/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch index 21a237422ab..22797f933fa 100644 --- a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch +++ b/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch @@ -1,6 +1,6 @@ -From 8e5d5b5798febec45bb442e96f2633ac1c2bf6ba Mon Sep 17 00:00:00 2001 -From: Vasily Shamporov -Date: Mon, 2 Aug 2021 17:46:48 +0300 +From d333a563fd28c538f93217af3d7556141d19d51b Mon Sep 17 00:00:00 2001 +From: skholkin +Date: Fri, 4 Mar 2022 14:03:05 +0300 Subject: [PATCH] Modifications for NNCF usage --- @@ -12,9 +12,10 @@ Subject: [PATCH] Modifications for NNCF usage nncf_bert_config_conll.json | 44 +++++++ nncf_bert_config_mrpc.json | 42 +++++++ nncf_bert_config_squad.json | 44 +++++++ + nncf_bert_config_squad_kd.json | 50 ++++++++ ...config_squad_magnitude_sparsity_cubic.json | 31 +++++ nncf_bert_config_xnli.json | 36 ++++++ - nncf_distilbert_config_sst2.json | 33 +++++ + nncf_distilbert_config_sst2.json | 34 +++++ nncf_gpt2_config_wikitext_hw_config.json | 58 +++++++++ nncf_mobilebert_config_squad_int8.json | 46 +++++++ nncf_roberta_config_mnli.json | 36 ++++++ @@ -23,10 +24,11 @@ Subject: [PATCH] Modifications for NNCF usage src/transformers/trainer.py | 74 +++++++++-- src/transformers/trainer_callback.py | 2 + src/transformers/training_args.py | 6 + - 19 files changed, 842 insertions(+), 73 deletions(-) + 20 files changed, 893 insertions(+), 73 deletions(-) create mode 100644 nncf_bert_config_conll.json create mode 100644 nncf_bert_config_mrpc.json create mode 100644 nncf_bert_config_squad.json + create mode 100644 nncf_bert_config_squad_kd.json create mode 100644 nncf_bert_config_squad_magnitude_sparsity_cubic.json create mode 100644 nncf_bert_config_xnli.json create mode 100644 nncf_distilbert_config_sst2.json @@ -851,6 +853,62 @@ index 000000000..12e0440f7 + } + } +} +diff --git a/nncf_bert_config_squad_kd.json b/nncf_bert_config_squad_kd.json +new file mode 100644 +index 000000000..f5872a0a2 +--- /dev/null ++++ b/nncf_bert_config_squad_kd.json +@@ -0,0 +1,50 @@ ++{ ++ "input_info": [ ++ { ++ "sample_size": [1, 384], ++ "type": "long" ++ }, ++ { ++ "sample_size": [1, 384], ++ "type": "long" ++ }, ++ { ++ "sample_size": [1, 384], ++ "type": "long" ++ } ++ ], ++ "compression": [{ ++ "algorithm": "quantization", ++ "initializer": { ++ "range": { ++ "num_init_samples": 32, ++ "type": "percentile", ++ "params": ++ { ++ "min_percentile": 0.01, ++ "max_percentile": 99.99 ++ } ++ }, ++ ++ "batchnorm_adaptation": { ++ "num_bn_adaptation_samples": 200 ++ } ++ }, ++ "activations": ++ { ++ "mode": "symmetric" ++ }, ++ "weights": ++ { ++ "mode": "symmetric", ++ "signed": true, ++ "per_channel": false ++ } ++ }, ++ { ++ "algorithm": "knowledge_distillation", ++ "type": "softmax", ++ "temperature": 3 ++ } ++ ] ++} diff --git a/nncf_bert_config_squad_magnitude_sparsity_cubic.json b/nncf_bert_config_squad_magnitude_sparsity_cubic.json new file mode 100644 index 000000000..b4452e8d4 @@ -932,10 +990,10 @@ index 000000000..a21a522fc +} diff --git a/nncf_distilbert_config_sst2.json b/nncf_distilbert_config_sst2.json new file mode 100644 -index 000000000..6b648ca5e +index 000000000..868735016 --- /dev/null +++ b/nncf_distilbert_config_sst2.json -@@ -0,0 +1,33 @@ +@@ -0,0 +1,34 @@ +{ + "input_info": [ + { @@ -951,7 +1009,8 @@ index 000000000..6b648ca5e + "algorithm": "quantization", + "initializer": { + "range": { -+ "num_init_samples": 32 ++ "num_init_samples": 32, ++ "type": "mean_percentile" + } + }, + "ignored_scopes": [ diff --git a/third_party_integration/huggingface_transformers/README.md b/third_party_integration/huggingface_transformers/README.md index cad7458d0b4..8d2b6e66a57 100644 --- a/third_party_integration/huggingface_transformers/README.md +++ b/third_party_integration/huggingface_transformers/README.md @@ -43,12 +43,17 @@ _INT8 model (symmetric weights, asymmetric activations quantization)_ - 77.22% a _Full-precision FP32 baseline model_ - bert-large-uncased-whole-word-masking model, trained on SQuAD v1.1 - 93.21% F1, 87.2% EM on the dev set, -_INT8 model (symmetric quantization)_ - 92.60% F1, 86.36% EM on the dev set. +_INT8 model (symmetric quantization)_ - 92.55% F1, 86.1% EM on the dev set. **INT8 model quantization-aware training command line (trained on 4x Tesla V100):** -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=6 --save_steps=400 --nncf_config nncf_bert_config_squad.json` +`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad.json` +_INT8 model (symmetric quantization) + Knowledge Distillation_ - 92.89% F1, 86.68% EM on the dev set. + +**INT8 model quantization-aware training + Knowledge Distillation command line (trained on 4x Tesla V100):** + +`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad_kd.json` **Fine-tuned INT8 model evaluation and ONNX export command line:** @@ -105,7 +110,7 @@ _INT8 model (asymmetrically quantized)_ - 89.25% accuracy (matched), 88.9% accur _Full-precision FP32 baseline model_ - distilbert-base-uncased-finetuned-sst-2-english, pre-trained on SST-2 - 91.1% accuracy -_INT8 model (symmetrically quantized)_ - 90.3% accuracy +_INT8 model (symmetrically quantized)_ - 90.94% accuracy **INT8 model quantization-aware training command line:**