This is a demo project for a REST API for translating text using pretrained language models. The stack consists of FastAPI and Uvicorn to provide the API service, FastTrack and accompanying language identification model is used for automatic language detection, and Opus-MT language models are used for the text translation. Currently only supports the following language pairs, but additional Opus-MT models can be added.
ru-en
zh-en
git clone ...
cd src
pip install -r requirements-dev.txt
cd src
docker build -t translator .
docker run -p "80:80" translator
To start a local dev server:
cd src/dev-tools
python start-server.py
https://huggingface.co/Helsinki-NLP/opus-mt-ru-en https://huggingface.co/Helsinki-NLP/opus-mt-zh-en
@InProceedings{TiedemannThottingal:EAMT2020,
author = {J{\"o}rg Tiedemann and Santhosh Thottingal},
title = {{OPUS-MT} — {B}uilding open translation services for the {W}orld},
booktitle = {Proceedings of the 22nd Annual Conferenec of the European Association for Machine Translation (EAMT)},
year = {2020},
address = {Lisbon, Portugal}
}
https://github.com/facebookresearch/fastText
@article{joulin2016bag,
title={Bag of Tricks for Efficient Text Classification},
author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas},
journal={arXiv preprint arXiv:1607.01759},
year={2016}
}
@article{joulin2016fasttext,
title={FastText.zip: Compressing text classification models},
author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas},
journal={arXiv preprint arXiv:1612.03651},
year={2016}
}