Skip to content

Commit

Permalink
fix flake8 issues (facebookresearch#2570)
Browse files Browse the repository at this point in the history
Summary:
# Before submitting

- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?

## What does this PR do?
- [x] applies flake8 fixes to main branch (fairinternal/fairseq-py#2546) - still more to be fixed

Fix GPU tests:
- [x] when torch.ao.quantization import doesn't work use torch.quantization
- [x] build apex from earlier commit in circleci so that its compatible with pytorch 1.8 and 1.9

## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.

## Did you have fun?
Make sure you had fun coding �

Pull Request resolved: fairinternal/fairseq-py#2570

Reviewed By: Mortimerp9

Differential Revision: D32955312

Pulled By: dianaml0

fbshipit-source-id: e163cbd4998f171f819e31b0682c1c0f1986f9e1
  • Loading branch information
dianaml0 authored and facebook-github-bot committed Dec 9, 2021
1 parent c620ed0 commit 88e7d25
Show file tree
Hide file tree
Showing 22 changed files with 73 additions and 61 deletions.
5 changes: 3 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ gpu: &gpu
machine:
image: ubuntu-1604-cuda-11.1:202012-01
resource_class: gpu.nvidia.medium.multi


# -------------------------------------------------------------------------------------
# Re-usable commands
Expand All @@ -25,7 +25,7 @@ install_dep_common: &install_dep_common
pip install --upgrade setuptools
pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
pip install --progress-bar off pytest
pip install --progress-bar off fairscale==0.4.1
pip install --progress-bar off fairscale
pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
python -c 'import torch; print("Torch version:", torch.__version__)'
python -m torch.utils.collect_env
Expand All @@ -38,6 +38,7 @@ install_dep_fused_ops: &install_dep_fused_ops
source activate fairseq
git clone https://github.com/NVIDIA/apex
cd apex
git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
cd ~/
git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ repos:
- id: end-of-file-fixer

- repo: https://github.com/ambv/black
rev: 20.8b1
rev: 21.12b0
hooks:
- id: black
language_version: python3.8
Expand Down
2 changes: 0 additions & 2 deletions fairseq/logging/meters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from collections import OrderedDict
from typing import Dict, Optional


try:
import torch

Expand All @@ -18,7 +17,6 @@ def type_as(a, b):
else:
return a


except ImportError:
torch = None

Expand Down
37 changes: 22 additions & 15 deletions fairseq/models/speech_to_text/modules/emformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,30 @@

import torch
import torch.nn as nn
from fairseq.models import (
FairseqEncoder,
)
from torch import Tensor
from torch import device as Device

from fairseq.models import FairseqEncoder
from fairseq.models.speech_to_text.utils import (
NoOp,
lengths_to_padding_mask,
segments_to_sequence,
)
from fairseq.models.speech_to_text.utils import (
attention_suppression,
layer_norm_backward_hook,
lengths_to_padding_mask,
segments_to_sequence,
)
from torch import Tensor, device as Device
from torch.ao.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)

try:
import torch.ao.quantization as quantization
from torch.ao.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)
except ImportError:
import torch.quantization as quantization
from torch.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)


class RelativePositionEmbedding(nn.Module):
Expand Down Expand Up @@ -140,7 +147,7 @@ def quantize_(self, params=None):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self
Expand Down Expand Up @@ -728,7 +735,7 @@ def quantize_(self, params=None):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self
Expand Down Expand Up @@ -1771,7 +1778,7 @@ def quantize_(self, params=None):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self
Expand Down
2 changes: 0 additions & 2 deletions fairseq/modules/cross_entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch
import torch.nn.functional as F


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -54,7 +53,6 @@ def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):
else:
raise NotImplementedError


except ImportError:

def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):
Expand Down
2 changes: 0 additions & 2 deletions fairseq/modules/layer_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import torch.nn as nn
import torch.nn.functional as F


try:
from apex.normalization import FusedLayerNorm as _FusedLayerNorm

Expand All @@ -22,7 +21,6 @@ def forward(self, x):
with torch.cuda.device(x.device):
return super().forward(x)


except ImportError:
has_fused_layernorm = False

Expand Down
11 changes: 8 additions & 3 deletions fairseq/modules/quantization/scalar/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

import torch

try:
import torch.ao.quantization as quantization
except ImportError:
import torch.quantization as quantization


def emulate_int(w, bits, method, scale=None, zero_point=None):
q = globals()[f"emulate_int8_{method}"]
Expand All @@ -21,7 +26,7 @@ def quantize(w, scale, zero_point, bits=8):

def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.HistogramObserver()
obs = quantization.observer.HistogramObserver()
obs.to(device=w.device)
_ = obs(w.float())
scale, zero_point = obs.calculate_qparams()
Expand All @@ -32,7 +37,7 @@ def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):

def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.PerChannelMinMaxObserver(
obs = quantization.observer.PerChannelMinMaxObserver(
ch_axis=-1, qscheme=torch.per_channel_symmetric
)
obs.to(device=w.device)
Expand All @@ -45,7 +50,7 @@ def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):

def emulate_int8_tensor(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.MinMaxObserver()
obs = quantization.observer.MinMaxObserver()
obs.to(device=w.device)
_ = obs(w)
scale, zero_point = obs.calculate_qparams()
Expand Down
5 changes: 2 additions & 3 deletions fairseq/optim/fused_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def get_fused_adam_class():
except ImportError:
try:
# fallback to the newer interface
from apex.optimizers import FusedAdam as _FusedAdam # noqa
from apex.multi_tensor_apply import multi_tensor_applier
from apex.optimizers import FusedAdam as _FusedAdam # noqa

if multi_tensor_applier.available:
return FusedAdamV2
Expand Down Expand Up @@ -252,8 +252,8 @@ def inf_norm(t):


try:
from apex.optimizers import FusedAdam
from apex.multi_tensor_apply import multi_tensor_applier
from apex.optimizers import FusedAdam

class FusedAdamV2(FusedAdam):
"""
Expand Down Expand Up @@ -382,6 +382,5 @@ def step(

return loss


except ImportError:
pass
14 changes: 8 additions & 6 deletions scripts/average_checkpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,18 @@ def main():
help='Write the new checkpoint containing the averaged weights to this path.')
num_group = parser.add_mutually_exclusive_group()
num_group.add_argument('--num-epoch-checkpoints', type=int,
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the path specified by input, '
'and average last this many of them.')
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the '
'path specified by input, and average last this many of them.')
num_group.add_argument('--num-update-checkpoints', type=int,
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by input, '
'and average last this many of them.')
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by'
' input, and average last this many of them.')
parser.add_argument('--checkpoint-upper-bound', type=int,
help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, '
'when using --num-update-checkpoints, this will set an upper bound on which update to use'
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be averaged.'
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would be averaged assuming --save-interval-updates 500'
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be'
' averaged.'
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would'
' be averaged assuming --save-interval-updates 500'
)
# fmt: on
args = parser.parse_args()
Expand Down
2 changes: 0 additions & 2 deletions scripts/constraints/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import random
import sys

from sacrebleu import extract_ngrams


def get_phrase(words, index, length):
assert index < len(words) - length + 1
Expand Down
8 changes: 4 additions & 4 deletions scripts/spm_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ def main():

if args.input_format == "piece":

def decode(l):
return "".join(sp.DecodePieces(l))
def decode(input):
return "".join(sp.DecodePieces(input))

elif args.input_format == "id":

def decode(l):
return "".join(sp.DecodeIds(l))
def decode(input):
return "".join(sp.DecodeIds(input))

else:
raise NotImplementedError
Expand Down
8 changes: 4 additions & 4 deletions scripts/spm_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ def main():

if args.output_format == "piece":

def encode(l):
return sp.EncodeAsPieces(l)
def encode(input):
return sp.EncodeAsPieces(input)

elif args.output_format == "id":

def encode(l):
return list(map(str, sp.EncodeAsIds(l)))
def encode(input):
return list(map(str, sp.EncodeAsIds(input)))

else:
raise NotImplementedError
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[flake8]
max-line-length = 127
extend-ignore = E203, W503
extend-exclude = fairseq/model_parallel/megatron
1 change: 0 additions & 1 deletion tests/distributed/test_bmuf.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def setup_args():
@unittest.skipIf(torch.cuda.device_count() < 2, "test requires 2 GPUs")
class TestBMUF(unittest.TestCase):
def bmuf_process(self, cfg, args, iterations):
processes = []
results = Manager().dict()
torch.multiprocessing.spawn(
fn=functools.partial(single_gpu_training, cfg, args),
Expand Down
3 changes: 3 additions & 0 deletions tests/gpu/test_binaries_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=Fa
train.main(quantize_args)


@unittest.skipIf(
int(torch.__version__[2]) < 10, reason="quantized kernels are only supported on CPU"
)
@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
class TestQuantization(unittest.TestCase):
def setUp(self):
Expand Down
12 changes: 9 additions & 3 deletions tests/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import sys
import unittest
from typing import List

import torch
from fairseq.token_generation_constraints import *

from fairseq.token_generation_constraints import (
ConstraintNode,
OrderedConstraintState,
UnorderedConstraintState,
pack_constraints,
)


def tensorize(constraints: List[List[int]]) -> torch.Tensor:
Expand Down Expand Up @@ -53,7 +59,7 @@ def setUp(self):
self.examples = [
(
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))", # noqa
{1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
),
([], "[None].False#0", {}),
Expand Down
2 changes: 1 addition & 1 deletion tests/test_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_file_io_oss(self):
def test_file_io_async(self):
# ioPath `PathManager` is initialized after the first `opena` call.
try:
from fairseq.file_io import IOPathManager, PathManager
from fairseq.file_io import PathManager

_asyncfile = os.path.join(self._tmpdir, "async.txt")
f = PathManager.opena(_asyncfile, "wb")
Expand Down
1 change: 0 additions & 1 deletion tests/test_fp16_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import copy
import logging
import unittest
Expand Down
2 changes: 1 addition & 1 deletion tests/test_multi_corpus_sampled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_multi_corpus_sampled_dataset_uniform_sample(self):

def test_multi_corpus_sampled_dataset_weighted_sample(self):
def naive_weighted_sample(weights):
def f(l):
def f(input):
v = np.random.random()
agg = 0
for i, weight in enumerate(weights):
Expand Down
2 changes: 0 additions & 2 deletions tests/test_reproducibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import contextlib
import json
import os
import tempfile
import unittest
from io import StringIO

import torch

Expand Down
Loading

0 comments on commit 88e7d25

Please sign in to comment.