From 2cc781de8d3ad451a0095185312c17f704541def Mon Sep 17 00:00:00 2001
From: Yang Heng <yangheng95@163.com>
Date: Sun, 16 Jan 2022 14:42:11 +0000
Subject: [PATCH] update_readme

---
 .gitignore | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.MD  |  43 +++++++++++----
 2 files changed, 188 insertions(+), 10 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..28d67be9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,155 @@
+# dev files
+*.cache
+*.dev.py
+state_dict/
+*.results
+*.tokenizer
+*.model
+*.state_dict
+*.config
+*.args
+*.zip
+*.gz
+*.bin
+*.result.txt
+*.DS_Store
+*.tmp
+*.args.txt
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.pyc
+*.result.json
+.idea/
+
+# Embedding
+glove.840B.300d.txt
+glove.42B.300d.txt
+glove.twitter.27B.txt
+
+# project main files
+release_note.json
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer training_logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+.DS_Store
+.DS_Store
+examples/.DS_Store
diff --git a/README.MD b/README.MD
index 4fe12080..1c64f0f1 100644
--- a/README.MD
+++ b/README.MD
@@ -6,28 +6,51 @@
 [![total clones per week](https://raw.githubusercontent.com/yangheng95/ABSADatasets/traffic/total_clones_per_week.svg)](https://github.com/yangheng95/ABSADatasets/tree/traffic#-total-traffic-data-badge)
 
 ## Contribute (prepare) your dataset in using PyABSA
-We hope you can share your custom dataset or a available public dataset. If you want to, follow these steps:
+We hope you can share your custom dataset or an available public dataset. If you want to, follow these steps:
  
 ### Important: Rename your dataset filename before use it in PyABSA
 
-- APC dataset name should be: {id}.dataset name}.{type}.dat , e.g., `996.restaurant.train.dat`, `996.restaurant.test.dat`, `996.restaurant.valid.dat`
-- ATEPC dataset name should be: {id}.{dataset name}.{type}.dat.atepc , `e.g., 996.restaurant.train.dat.atepc`, `996.restaurant.test.dat.atepc`, `996.restaurant.valid.dat.atepc`
+Although the integrated datasets have no ids, it is recommended to assign an id for your dataset. 
+While merge your datasets into ABSADatasets, please keep the id remained.
+
+- APC dataset name should be {id}.{dataset name}, and the dataset files should be named in. {id}.{dataset name}.{type}.dat.atepc e.g., 
+```tree
+datasets
+├── 996.restaurant
+│    ├── 996.restaurant.train.dat  # train_dataset
+│    ├── 996.restaurant.test.dat  # test_dataset
+│    └── 996.restaurant.valid.dat  # valid_dataset, dev set are not recognized in PyASBA, please rename dev-set to valid-set
+└── others
+```
+
+- ATEPC dataset files should be {id}.{dataset name}.{type}.dat.atepc,
+e.g., 
+```tree
+datasets
+├── 996.restaurant
+│    ├── 996.restaurant.train.dat.atepc  # train_dataset
+│    ├── 996.restaurant.test.dat.atepc  # test_dataset
+│    └── 996.restaurant.valid.dat.atepc  # valid_dataset, dev set are not recognized in PyASBA, please rename dev-set to valid-set
+└── others
+```
 
-Then, use the id to loacte your dataset, e.g., 
+Then, use the id to locate your dataset, e.g., 
 
 ```python3
 from pyabsa.functional import APCConfigManager
+from pyabsa.functional import Trainer
+from autocuda import auto_cuda
 
 config = APCConfigManager.get_apc_config_english() # APC task
 
- Trainer(config=config,
-         dataset='996.restaurant',  # train set and test set will be automatically detected
-         checkpoint_save_mode=1,
-         auto_device=device  # automatic choose CUDA or CPU
-         )
+Trainer(config=config,
+        dataset='996.restaurant',  # train set and test set will be automatically detected
+        checkpoint_save_mode=1,
+        auto_device=auto_cuda()  # automatic choose CUDA or CPU
+        )
 ```
 
-It will avoid some potential probelm while PyABSA detects the dataset.
+It will avoid some potential problem (e.g., duplicated dataset name) while PyABSA detects the dataset.
 
 ### Dataset Processing
 - Format your APC dataset according to our dataset format. (**Recommended. Once you finished this step, we can help you to finish other steps**)