Skip to content

Commit

Permalink
Set usedforsecurity=False in hashlib methods (FIPS compliance) (hug…
Browse files Browse the repository at this point in the history
…gingface#27483)

* Set usedforsecurity=False in hashlib methods (FIPS compliance)

* trigger ci

* tokenizers version

* deps

* bump hfh version

* let's try this
  • Loading branch information
Wauplin authored Nov 16, 2023
1 parent 5603fad commit fd65aa9
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import gzip
import hashlib
import json
import multiprocessing
import os
Expand All @@ -11,6 +10,7 @@
import numpy as np
from arguments import PreprocessingArguments
from datasets import load_dataset
from huggingface_hub.utils import insecure_hashlib
from minhash_deduplication import deduplicate_dataset

from transformers import AutoTokenizer, HfArgumentParser
Expand All @@ -21,7 +21,7 @@

def get_hash(example):
"""Get hash of content field."""
return {"hash": hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()}
return {"hash": insecure_hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()}


def line_stats(example):
Expand Down
6 changes: 3 additions & 3 deletions examples/research_projects/lxmert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from collections import OrderedDict
from contextlib import contextmanager
from functools import partial
from hashlib import sha256
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
Expand All @@ -39,6 +38,7 @@
import requests
import wget
from filelock import FileLock
from huggingface_hub.utils import insecure_hashlib
from PIL import Image
from tqdm.auto import tqdm
from yaml import Loader, dump, load
Expand Down Expand Up @@ -402,12 +402,12 @@ def _resumable_file_manager():

def url_to_filename(url, etag=None):
url_bytes = url.encode("utf-8")
url_hash = sha256(url_bytes)
url_hash = insecure_hashlib.sha256(url_bytes)
filename = url_hash.hexdigest()

if etag:
etag_bytes = etag.encode("utf-8")
etag_hash = sha256(etag_bytes)
etag_hash = insecure_hashlib.sha256(etag_bytes)
filename += "." + etag_hash.hexdigest()

if url.endswith(".h5"):
Expand Down
6 changes: 3 additions & 3 deletions examples/research_projects/visual_bert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from collections import OrderedDict
from contextlib import contextmanager
from functools import partial
from hashlib import sha256
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
Expand All @@ -39,6 +38,7 @@
import requests
import wget
from filelock import FileLock
from huggingface_hub.utils import insecure_hashlib
from PIL import Image
from tqdm.auto import tqdm
from yaml import Loader, dump, load
Expand Down Expand Up @@ -402,12 +402,12 @@ def _resumable_file_manager():

def url_to_filename(url, etag=None):
url_bytes = url.encode("utf-8")
url_hash = sha256(url_bytes)
url_hash = insecure_hashlib.sha256(url_bytes)
filename = url_hash.hexdigest()

if etag:
etag_bytes = etag.encode("utf-8")
etag_hash = sha256(etag_bytes)
etag_hash = insecure_hashlib.sha256(etag_bytes)
filename += "." + etag_hash.hexdigest()

if url.endswith(".h5"):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
"fugashi>=1.0",
"GitPython<3.1.19",
"hf-doc-builder>=0.3.0",
"huggingface-hub>=0.16.4,<1.0",
"huggingface-hub>=0.19.3,<1.0",
"importlib_metadata",
"ipadic>=1.0.0,<2.0",
"isort>=5.5.4",
Expand Down Expand Up @@ -321,6 +321,7 @@ def run(self):
"rjieba",
"beautifulsoup4",
"tensorboard",
"pydantic",
)
+ extras["retrieval"]
+ extras["modelcreation"]
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"fugashi": "fugashi>=1.0",
"GitPython": "GitPython<3.1.19",
"hf-doc-builder": "hf-doc-builder>=0.3.0",
"huggingface-hub": "huggingface-hub>=0.16.4,<1.0",
"huggingface-hub": "huggingface-hub>=0.19.3,<1.0",
"importlib_metadata": "importlib_metadata",
"ipadic": "ipadic>=1.0.0,<2.0",
"isort": "isort>=5.5.4",
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/whisper/convert_openai_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.

import argparse
import hashlib
import io
import json
import os
Expand All @@ -24,6 +23,7 @@
import warnings

import torch
from huggingface_hub.utils import insecure_hashlib
from torch import nn
from tqdm import tqdm

Expand Down Expand Up @@ -114,7 +114,7 @@ def _download(url: str, root: str) -> io.BytesIO:

if os.path.isfile(download_target):
model_bytes = open(download_target, "rb").read()
if hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
return torch.load(io.BytesIO(model_bytes))
else:
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
Expand All @@ -132,7 +132,7 @@ def _download(url: str, root: str) -> io.BytesIO:
loop.update(len(buffer))

model_bytes = open(download_target, "rb").read()
if hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
raise RuntimeError(
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
)
Expand Down
5 changes: 3 additions & 2 deletions tests/pipelines/test_pipelines_depth_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import hashlib
import unittest

from huggingface_hub.utils import insecure_hashlib

from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
from transformers.pipelines import DepthEstimationPipeline, pipeline
from transformers.testing_utils import (
Expand Down Expand Up @@ -44,7 +45,7 @@ def open(*args, **kwargs):


def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes())
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()


Expand Down
4 changes: 2 additions & 2 deletions tests/pipelines/test_pipelines_image_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import hashlib
import tempfile
import unittest
from typing import Dict
Expand All @@ -21,6 +20,7 @@
import numpy as np
import requests
from datasets import load_dataset
from huggingface_hub.utils import insecure_hashlib

from transformers import (
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -59,7 +59,7 @@ def open(*args, **kwargs):


def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes())
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]


Expand Down
4 changes: 2 additions & 2 deletions tests/pipelines/test_pipelines_mask_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import hashlib
import unittest
from typing import Dict

import numpy as np
from huggingface_hub.utils import insecure_hashlib

from transformers import (
MODEL_FOR_MASK_GENERATION_MAPPING,
Expand Down Expand Up @@ -46,7 +46,7 @@ def open(*args, **kwargs):


def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes())
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]


Expand Down

0 comments on commit fd65aa9

Please sign in to comment.