-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
25 changed files
with
8,618 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
datasets/ | ||
runs/ | ||
outputs/ | ||
model/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import json, argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--original_train_path", default="./datasets/quac/train.json", type=str, help="path") | ||
parser.add_argument("--nbest_pred_1_path", default="", type=str, help="path") | ||
parser.add_argument("--output_step2_train_path", default="", type=str, help="path") | ||
args = parser.parse_args() | ||
|
||
with open(args.original_train_path) as json_file, open(args.nbest_pred_1_path) as predicted_json_file: | ||
json_data = json.load(json_file) | ||
json_predicted_data = json.load(predicted_json_file) | ||
|
||
for i, data in enumerate(json_data['data']): | ||
for paragraph in data['paragraphs']: | ||
for qa in paragraph['qas']: | ||
qa['predicted_answers']=json_predicted_data[qa['id']] | ||
|
||
with open(args.output_step2_train_path, "w") as writer: | ||
writer.write(json.dumps(json_data, indent=4) + "\n") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
# coding=utf-8 | ||
# Copyright 2018 The HuggingFace Inc. team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" Auto Model class. """ | ||
|
||
|
||
import warnings | ||
from collections import OrderedDict | ||
|
||
from transformers.configuration_auto import ( | ||
AutoConfig, | ||
BertConfig, | ||
replace_list_option_in_docstrings, | ||
) | ||
|
||
from transformers.file_utils import( | ||
add_start_docstrings, | ||
) | ||
|
||
from transformers.configuration_utils import PretrainedConfig | ||
|
||
from transformers.utils import logging | ||
|
||
from modeling_bert_ts import ( | ||
BertForQuestionAnswering | ||
) | ||
|
||
|
||
logger = logging.get_logger(__name__) | ||
|
||
|
||
MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict( | ||
[ | ||
(BertConfig, BertForQuestionAnswering), | ||
] | ||
) | ||
|
||
AUTO_MODEL_PRETRAINED_DOCSTRING = r""" | ||
The model class to instantiate is selected based on the :obj:`model_type` property of the config object (either | ||
passed as an argument or loaded from :obj:`pretrained_model_name_or_path` if possible), or when it's missing, | ||
by falling back to using pattern matching on :obj:`pretrained_model_name_or_path`: | ||
List options | ||
The model is set in evaluation mode by default using ``model.eval()`` (so for instance, dropout modules are | ||
deactivated). To train the model, you should first set it back in training mode with ``model.train()`` | ||
Args: | ||
pretrained_model_name_or_path: | ||
Can be either: | ||
- A string with the `shortcut name` of a pretrained model to load from cache or download, e.g., | ||
``bert-base-uncased``. | ||
- A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g., | ||
``dbmdz/bert-base-german-cased``. | ||
- A path to a `directory` containing model weights saved using | ||
:func:`~transformers.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``. | ||
- A path or url to a `tensorflow index checkpoint file` (e.g, ``./tf_model/model.ckpt.index``). In | ||
this case, ``from_tf`` should be set to :obj:`True` and a configuration object should be provided | ||
as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in | ||
a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. | ||
model_args (additional positional arguments, `optional`): | ||
Will be passed along to the underlying model ``__init__()`` method. | ||
config (:class:`~transformers.PretrainedConfig`, `optional`): | ||
Configuration for the model to use instead of an automatically loaded configuration. Configuration can | ||
be automatically loaded when: | ||
- The model is a model provided by the library (loaded with the `shortcut name` string of a | ||
pretrained model). | ||
- The model was saved using :meth:`~transformers.PreTrainedModel.save_pretrained` and is reloaded | ||
by supplying the save directory. | ||
- The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a | ||
configuration JSON file named `config.json` is found in the directory. | ||
state_dict (`Dict[str, torch.Tensor]`, `optional`): | ||
A state dictionary to use instead of a state dictionary loaded from saved weights file. | ||
This option can be used if you want to create a model from a pretrained configuration but load your own | ||
weights. In this case though, you should check if using | ||
:func:`~transformers.PreTrainedModel.save_pretrained` and | ||
:func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option. | ||
cache_dir (:obj:`str`, `optional`): | ||
Path to a directory in which a downloaded pretrained model configuration should be cached if the | ||
standard cache should not be used. | ||
from_tf (:obj:`bool`, `optional`, defaults to :obj:`False`): | ||
Load the model weights from a TensorFlow checkpoint save file (see docstring of | ||
``pretrained_model_name_or_path`` argument). | ||
force_download (:obj:`bool`, `optional`, defaults to :obj:`False`): | ||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the | ||
cached versions if they exist. | ||
resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`): | ||
Whether or not to delete incompletely received files. Will attempt to resume the download if such a | ||
file exists. | ||
proxies (:obj:`Dict[str, str], `optional`): | ||
A dictionary of proxy servers to use by protocol or endpoint, e.g., :obj:`{'http': 'foo.bar:3128', | ||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. | ||
output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`): | ||
Whether ot not to also return a dictionary containing missing keys, unexpected keys and error messages. | ||
local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`): | ||
Whether or not to only look at local files (e.g., not try downloading the model). | ||
use_cdn(:obj:`bool`, `optional`, defaults to :obj:`True`): | ||
Whether or not to use Cloudfront (a Content Delivery Network, or CDN) when searching for the model on | ||
our S3 (faster). Should be set to :obj:`False` for checkpoints larger than 20GB. | ||
kwargs (additional keyword arguments, `optional`): | ||
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., | ||
:obj:`output_attentions=True`). Behaves differently depending on whether a ``config`` is provided or | ||
automatically loaded: | ||
- If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the | ||
underlying model's ``__init__`` method (we assume all relevant updates to the configuration have | ||
already been done) | ||
- If a configuration is not provided, ``kwargs`` will be first passed to the configuration class | ||
initialization function (:func:`~transformers.PretrainedConfig.from_pretrained`). Each key of | ||
``kwargs`` that corresponds to a configuration attribute will be used to override said attribute | ||
with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration | ||
attribute will be passed to the underlying model's ``__init__`` function. | ||
""" | ||
|
||
class AutoModelForQuestionAnswering: | ||
r""" | ||
This is a generic model class that will be instantiated as one of the model classes of the library---with a | ||
question answering head---when created with the when created with the | ||
:meth:`~transformers.AutoModeForQuestionAnswering.from_pretrained` class method or the | ||
:meth:`~transformers.AutoModelForQuestionAnswering.from_config` class method. | ||
This class cannot be instantiated directly using ``__init__()`` (throws an error). | ||
""" | ||
|
||
def __init__(self): | ||
raise EnvironmentError( | ||
"AutoModelForQuestionAnswering is designed to be instantiated " | ||
"using the `AutoModelForQuestionAnswering.from_pretrained(pretrained_model_name_or_path)` or " | ||
"`AutoModelForQuestionAnswering.from_config(config)` methods." | ||
) | ||
|
||
@classmethod | ||
@replace_list_option_in_docstrings(MODEL_FOR_QUESTION_ANSWERING_MAPPING, use_model_types=False) | ||
def from_config(cls, config): | ||
r""" | ||
Instantiates one of the model classes of the library---with a question answering head---from a configuration. | ||
Note: | ||
Loading a model from its configuration file does **not** load the model weights. It only affects the | ||
model's configuration. Use :meth:`~transformers.AutoModelForQuestionAnswering.from_pretrained` to load the | ||
model weights. | ||
Args: | ||
config (:class:`~transformers.PretrainedConfig`): | ||
The model class to instantiate is selected based on the configuration class: | ||
List options | ||
Examples:: | ||
>>> from transformers import AutoConfig, AutoModelForQuestionAnswering | ||
>>> # Download configuration from S3 and cache. | ||
>>> config = AutoConfig.from_pretrained('bert-base-uncased') | ||
>>> model = AutoModelForQuestionAnswering.from_config(config) | ||
""" | ||
if type(config) in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys(): | ||
return MODEL_FOR_QUESTION_ANSWERING_MAPPING[type(config)](config) | ||
|
||
raise ValueError( | ||
"Unrecognized configuration class {} for this kind of AutoModel: {}.\n" | ||
"Model type should be one of {}.".format( | ||
config.__class__, | ||
cls.__name__, | ||
", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), | ||
) | ||
) | ||
|
||
@classmethod | ||
@replace_list_option_in_docstrings(MODEL_FOR_QUESTION_ANSWERING_MAPPING) | ||
@add_start_docstrings( | ||
"Instantiate one of the model classes of the library---with a question answering head---from a " | ||
"pretrained model.", | ||
AUTO_MODEL_PRETRAINED_DOCSTRING, | ||
) | ||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): | ||
r""" | ||
Examples:: | ||
>>> from transformers import AutoConfig, AutoModelForQuestionAnswering | ||
>>> # Download model and configuration from S3 and cache. | ||
>>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased') | ||
>>> # Update configuration during loading | ||
>>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased', output_attentions=True) | ||
>>> model.config.output_attentions | ||
True | ||
>>> # Loading from a TF checkpoint file instead of a PyTorch model (slower) | ||
>>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') | ||
>>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) | ||
""" | ||
config = kwargs.pop("config", None) | ||
if not isinstance(config, PretrainedConfig): | ||
config, kwargs = AutoConfig.from_pretrained( | ||
pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs | ||
) | ||
|
||
if type(config) in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys(): | ||
return MODEL_FOR_QUESTION_ANSWERING_MAPPING[type(config)].from_pretrained( | ||
pretrained_model_name_or_path, *model_args, config=config, **kwargs | ||
) | ||
|
||
raise ValueError( | ||
"Unrecognized configuration class {} for this kind of AutoModel: {}.\n" | ||
"Model type should be one of {}.".format( | ||
config.__class__, | ||
cls.__name__, | ||
", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), | ||
) | ||
) |
Oops, something went wrong.