Add KorSTS and QuestionPair task

dhlee347 · Apr 25, 2020 · 98d3210 · 98d3210
1 parent 8cfd291
commit 98d3210
Show file tree

Hide file tree

Showing 26 changed files with 16,754 additions and 23 deletions.
diff --git a/finetune/README.md b/finetune/README.md
@@ -51,8 +51,9 @@ $ python3 run_squad.py --task korquad --config_file xlm-roberta.json
 - [NSMC](https://github.com/e9t/nsmc)
 - [Naver NER Dataset](https://github.com/naver/nlp-challenge)
 - [PAWS](https://github.com/google-research-datasets/paws)
-- [KorNLI](https://github.com/kakaobrain/KorNLUDatasets)
+- [KorNLI/KorSTS](https://github.com/kakaobrain/KorNLUDatasets)
 - [KorQuad](https://korquad.github.io/category/1.0_KOR.html)
 - [KoBERT](https://github.com/SKTBrain/KoBERT)
 - [HanBERT](https://github.com/tbai2019/HanBert-54k-N)
 - [HanBert Transformers](https://github.com/monologg/HanBert-Transformers)
+- [Question Pair](https://github.com/songys/Question_pair)
diff --git a/finetune/README_EN.md b/finetune/README_EN.md
@@ -52,8 +52,9 @@ In case of `KoELECTRA-Small`, overall performance is better than `DistilKoBERT`.
 - [NSMC](https://github.com/e9t/nsmc)
 - [Naver NER Dataset](https://github.com/naver/nlp-challenge)
 - [PAWS](https://github.com/google-research-datasets/paws)
-- [KorNLI](https://github.com/kakaobrain/KorNLUDatasets)
+- [KorNLI/KorSTS](https://github.com/kakaobrain/KorNLUDatasets)
 - [KorQuad](https://korquad.github.io/category/1.0_KOR.html)
 - [KoBERT](https://github.com/SKTBrain/KoBERT)
 - [HanBERT](https://github.com/tbai2019/HanBert-54k-N)
 - [HanBert Transformers](https://github.com/monologg/HanBert-Transformers)
+- [Question Pair](https://github.com/songys/Question_pair)
diff --git a/finetune/config/korsts/distilkobert.json b/finetune/config/korsts/distilkobert.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "distilkobert",
+  "model_name_or_path": "monologg/distilkobert",
+  "output_dir": "distilkobert-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/korsts/hanbert.json b/finetune/config/korsts/hanbert.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "hanbert",
+  "model_name_or_path": "HanBert-54kN-torch",
+  "output_dir": "hanbert-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/korsts/kobert.json b/finetune/config/korsts/kobert.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "kobert",
+  "model_name_or_path": "monologg/kobert",
+  "output_dir": "kobert-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/korsts/koelectra-base.json b/finetune/config/korsts/koelectra-base.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 10,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "koelectra-base",
+  "model_name_or_path": "monologg/koelectra-base-discriminator",
+  "output_dir": "koelectra-base-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 3e-5
+}
diff --git a/finetune/config/korsts/koelectra-small.json b/finetune/config/korsts/koelectra-small.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "koelectra-small",
+  "model_name_or_path": "monologg/koelectra-small-discriminator",
+  "output_dir": "koelectra-small-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/korsts/xlm-roberta.json b/finetune/config/korsts/xlm-roberta.json
@@ -0,0 +1,32 @@
+{
+  "task": "korsts",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "sts-train.tsv",
+  "dev_file": "sts-dev.tsv",
+  "test_file": "sts-test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "xlm-roberta",
+  "model_name_or_path": "xlm-roberta-base",
+  "output_dir": "xlm-roberta-korsts-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/question-pair/distilkobert.json b/finetune/config/question-pair/distilkobert.json
@@ -0,0 +1,32 @@
+{
+  "task": "question-pair",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "train.tsv",
+  "dev_file": "dev.tsv",
+  "test_file": "test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "distilkobert",
+  "model_name_or_path": "monologg/distilkobert",
+  "output_dir": "distilkobert-question-pair-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/question-pair/hanbert.json b/finetune/config/question-pair/hanbert.json
@@ -0,0 +1,32 @@
+{
+  "task": "question-pair",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "train.tsv",
+  "dev_file": "dev.tsv",
+  "test_file": "test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "hanbert",
+  "model_name_or_path": "HanBert-54kN-torch",
+  "output_dir": "hanbert-question-pair-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/question-pair/kobert.json b/finetune/config/question-pair/kobert.json
@@ -0,0 +1,32 @@
+{
+  "task": "question-pair",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "train.tsv",
+  "dev_file": "dev.tsv",
+  "test_file": "test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "kobert",
+  "model_name_or_path": "monologg/kobert",
+  "output_dir": "kobert-question-pair-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/question-pair/koelectra-base.json b/finetune/config/question-pair/koelectra-base.json
@@ -0,0 +1,32 @@
+{
+  "task": "question-pair",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "train.tsv",
+  "dev_file": "dev.tsv",
+  "test_file": "test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "koelectra-base",
+  "model_name_or_path": "monologg/koelectra-base-discriminator",
+  "output_dir": "koelectra-base-question-pair-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}
diff --git a/finetune/config/question-pair/koelectra-small.json b/finetune/config/question-pair/koelectra-small.json
@@ -0,0 +1,32 @@
+{
+  "task": "question-pair",
+  "data_dir": "data",
+  "ckpt_dir": "ckpt",
+  "train_file": "train.tsv",
+  "dev_file": "dev.tsv",
+  "test_file": "test.tsv",
+  "evaluate_test_during_training": false,
+  "eval_all_checkpoints": true,
+  "save_optimizer": false,
+  "do_lower_case": false,
+  "do_train": true,
+  "do_eval": true,
+  "max_seq_len": 72,
+  "num_train_epochs": 5,
+  "weight_decay": 0.0,
+  "gradient_accumulation_steps": 1,
+  "adam_epsilon": 1e-8,
+  "warmup_steps": 0,
+  "max_steps": -1,
+  "max_grad_norm": 1.0,
+  "no_cuda": false,
+  "model_type": "koelectra-small",
+  "model_name_or_path": "monologg/koelectra-small-discriminator",
+  "output_dir": "koelectra-small-question-pair-ckpt",
+  "seed": 42,
+  "train_batch_size": 32,
+  "eval_batch_size": 64,
+  "logging_steps": 100,
+  "save_steps": 100,
+  "learning_rate": 5e-5
+}