Implementation of H-Transformer-1D, Transformer using hierarchical Attention for sequence learning with subquadratic costs.
@misc{zhu2021htransformer1d,
title = {H-Transformer-1D: Fast One-Dimensional Hierarchical Attention for Sequences},
author = {Zhenhai Zhu and Radu Soricut},
year = {2021},
eprint = {2107.11906},
archivePrefix = {arXiv},
primaryClass = {cs.LG}
}