Skip to content

Commit

Permalink
[BUG Fix] Fix resize embeddings (PaddlePaddle#6196)
Browse files Browse the repository at this point in the history
* fix

* update

* feature_extract and image processing support subfolder from bos

* update t5

* update
  • Loading branch information
JunnYu authored Jun 19, 2023
1 parent 9c484e6 commit fd0d554
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 20 deletions.
46 changes: 36 additions & 10 deletions paddlenlp/transformers/feature_extraction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@

import numpy as np
import paddle
from huggingface_hub import hf_hub_download

from .. import __version__
from ..utils.downloader import COMMUNITY_MODEL_PREFIX, get_path_from_url_with_filelock
from ..utils.log import logger
from .tokenizer_utils_base import TensorType
Expand Down Expand Up @@ -241,29 +243,54 @@ def get_feature_extractor_dict(
Returns:
`Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the feature extractor object.
"""
cache_dir = resolve_cache_dir(
pretrained_model_name_or_path=pretrained_model_name_or_path,
from_hf_hub=False, # TODO: from_hf_hub not supported yet
cache_dir=kwargs.pop("cache_dir", None),
)
cache_dir = kwargs.pop("cache_dir", None)
from_hf_hub = kwargs.pop("from_hf_hub", False)
subfolder = kwargs.pop("subfolder", None)
cache_dir = resolve_cache_dir(pretrained_model_name_or_path, from_hf_hub, cache_dir)
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
is_local = os.path.isdir(pretrained_model_name_or_path)
if os.path.isdir(pretrained_model_name_or_path):
resolved_feature_extractor_file = os.path.join(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME)
if subfolder is None:
resolved_feature_extractor_file = os.path.join(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME)
else:
resolved_feature_extractor_file = os.path.join(
pretrained_model_name_or_path, subfolder, FEATURE_EXTRACTOR_NAME
)
elif os.path.isfile(pretrained_model_name_or_path):
resolved_feature_extractor_file = pretrained_model_name_or_path
is_local = True
elif from_hf_hub:
feature_extractor_file = FEATURE_EXTRACTOR_NAME
resolved_feature_extractor_file = hf_hub_download(
repo_id=pretrained_model_name_or_path,
filename=feature_extractor_file,
cache_dir=cache_dir,
subfolder=subfolder,
library_name="PaddleNLP",
library_version=__version__,
)
else:
# from pretrained_feature_extractor_file
if pretrained_model_name_or_path in cls.pretrained_feature_extractor_file:
feature_extractor_file = cls.pretrained_feature_extractor_file[pretrained_model_name_or_path]
else:
# Assuming from community-contributed pretrained models
feature_extractor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME]
)
if subfolder is None:
feature_extractor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME]
)
else:
feature_extractor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, subfolder, FEATURE_EXTRACTOR_NAME]
)
# update cache_dir
cache_dir = os.path.join(cache_dir, subfolder)
try:
resolved_feature_extractor_file = get_path_from_url_with_filelock(feature_extractor_file, cache_dir)
except EnvironmentError:
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
# the original exception.
raise
except Exception:
# For any other exception, we throw a generic error.
raise EnvironmentError(
Expand Down Expand Up @@ -321,7 +348,6 @@ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs):
for key in to_remove:
kwargs.pop(key, None)

logger.info(f"Feature extractor {feature_extractor}")
if return_unused_kwargs:
return feature_extractor, kwargs
else:
Expand Down
25 changes: 17 additions & 8 deletions paddlenlp/transformers/image_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,12 @@ def get_image_processor_dict(
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
is_local = os.path.isdir(pretrained_model_name_or_path)
if os.path.isdir(pretrained_model_name_or_path):
resolved_image_processor_file = os.path.join(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME)
if subfolder is None:
resolved_image_processor_file = os.path.join(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME)
else:
resolved_image_processor_file = os.path.join(
pretrained_model_name_or_path, subfolder, IMAGE_PROCESSOR_NAME
)
elif os.path.isfile(pretrained_model_name_or_path):
resolved_image_processor_file = pretrained_model_name_or_path
is_local = True
Expand All @@ -290,9 +295,16 @@ def get_image_processor_dict(
)
else:
# Assuming from community-contributed pretrained models
image_processor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME]
)
if subfolder is None:
image_processor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME]
)
else:
image_processor_file = "/".join(
[COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, subfolder, IMAGE_PROCESSOR_NAME]
)
# update cache_dir
cache_dir = os.path.join(cache_dir, subfolder)
try:
# Load from local folder or from cache or download from model Hub and cache
resolved_image_processor_file = get_path_from_url_with_filelock(image_processor_file, cache_dir)
Expand Down Expand Up @@ -323,9 +335,7 @@ def get_image_processor_dict(
if is_local:
logger.info(f"loading configuration file {resolved_image_processor_file}")
else:
logger.info(
f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}"
)
logger.info(f"loading configuration file from cache at {resolved_image_processor_file}")

return image_processor_dict, kwargs

Expand Down Expand Up @@ -359,7 +369,6 @@ def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
for key in to_remove:
kwargs.pop(key, None)

logger.info(f"Image processor {image_processor}")
if return_unused_kwargs:
return image_processor, kwargs
else:
Expand Down
11 changes: 10 additions & 1 deletion paddlenlp/transformers/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,16 @@ def _get_resized_embeddings(
)

# Build new embeddings
new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim)
new_embeddings = nn.Embedding(
new_num_tokens,
old_embedding_dim,
padding_idx=old_embeddings._padding_idx,
sparse=old_embeddings._sparse,
)

# make sure that new_embeddings's dtype is same as the old embeddings' dtype
if new_embeddings.weight.dtype != old_embeddings.weight.dtype:
new_embeddings.to(dtype=old_embeddings.weight.dtype)

# numbers of tokens to copy
n = min(old_num_tokens, new_num_tokens)
Expand Down
5 changes: 4 additions & 1 deletion paddlenlp/transformers/t5/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,10 @@ def __init__(self, config: T5Config):
def forward(self, hidden_states):
forwarded_states = self.layer_norm(hidden_states)
forwarded_states = self.DenseReluDense(forwarded_states)
hidden_states = hidden_states + self.dropout(forwarded_states)
# hidden_states maybe FP16
# self.dropout(forwarded_states) maybe FP32
# FP32 + FP16 = FP32, FP16 + FP32 = FP16
hidden_states = self.dropout(forwarded_states) + hidden_states
return hidden_states


Expand Down

0 comments on commit fd0d554

Please sign in to comment.