From d2d87479ef2a344db4210480d655b558b3496acf Mon Sep 17 00:00:00 2001 From: Huiqiang Jiang Date: Fri, 12 Jul 2024 15:30:30 +0800 Subject: [PATCH] Hotfix(MInference): fix the import warnings, fix the apply_rotary_pos_emb_single, fix phi-3 vs kernel (#30) Feature(MInference): remove pycuda, support multi-gpu Co-authored-by: Yucheng Li Co-authored-by: Chengruidong Zhang --- README.md | 2 +- experiments/README.md | 1 - minference/modules/minference_forward.py | 44 ++++++++++++++++--- .../ops/block_sparse_flash_attention.py | 5 ++- .../ops/pit_sparse_flash_attention_v2.py | 2 + minference/patch.py | 2 + setup.py | 2 +- 7 files changed, 48 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 443ef9e..7ec3fba 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ for a local gradio demo