Added support for RankT5

Prithivi Da · Prithivi Da · commit 11456b46c3f4 · 2023-12-13T00:43:06.000+07:00
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ Ultra-lite &amp; Super-fast Python library to add re-ranking to your existing se
     - Below are the list of models supported as of now.
         * ms-marco-TinyBERT-L-2-v2 (default)
         * ms-marco-MiniLM-L-12-v2
+        * rank-T5-flan (Best non cross-encoder reranker)
         * ms-marco-MultiBERT-L-12  (Multi-lingual, [supports 100+ languages](https://github.com/google-research/bert/blob/master/multilingual.md#list-of-languages))
 
     - Why only sleeker models? Reranking is the final leg of larger retrieval pipelines, idea is to avoid any extra overhead especially for user-facing scenarios. To that end models with really small footprint that doesn't need any specialised hardware and yet offer competitive performance are chosen. Feel free to raise issues to add support for a new models as you see fit.
@@ -42,7 +43,12 @@ ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir="/opt")
 
 or 
 
-# Medium (~150MB), slower model with best performance (ranking precision) for 100+ languages including en.
+# Medium (~110MB), slower model with best zeroshot performance (ranking precision) on out of domain data.
+ranker = Ranker(model_name="rank-T5-flan", cache_dir="/opt")
+
+or 
+
+# Medium (~150MB), slower model with competitive performance (ranking precision) for 100+ languages  (don't use for english)
 ranker = Ranker(model_name="ms-marco-MultiBERT-L-12", cache_dir="/opt")
 ```
 
@@ -87,7 +93,7 @@ print(results)
 ## Deployment patterns
 #### How to use it in a AWS Lambda function ?
 In AWS or other serverless environments the entire VM is read-only you might have to create your 
-own custom dir and use it for loading the models (and eventually as a cache between warm calls). You can do it during init with cache_dir parameter.
+own custom dir. You can do so in your Dockerfile and use it for loading the models (and eventually as a cache between warm calls). You can do it during init with cache_dir parameter. 
 
 ```python
 ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir="/opt")
diff --git a/flashrank/Config.py b/flashrank/Config.py
@@ -4,5 +4,6 @@
 model_file_map = {
     "ms-marco-TinyBERT-L-2-v2": "flashrank-TinyBERT-L-2-v2.onnx",
     "ms-marco-MiniLM-L-12-v2": "flashrank-MiniLM-L-12-v2_Q.onnx",
-    "ms-marco-MultiBERT-L-12": "flashrank-MultiBERT-L12_Q.onnx"
+    "ms-marco-MultiBERT-L-12": "flashrank-MultiBERT-L12_Q.onnx",
+    "rank-T5-flan": "flashrank-rankt5_Q.onnx"
 }
diff --git a/flashrank/Ranker.py b/flashrank/Ranker.py
@@ -112,22 +112,29 @@ def _get_tokenizer(self, max_length = 512):
       return tokenizer
     
     
-
     def rerank(self, query, passages):
 
         query_passage_pairs = [[query, passage] for passage in passages]
         input_text = self.tokenizer.encode_batch(query_passage_pairs)
         input_ids = np.array([e.ids for e in input_text])
         token_type_ids = np.array([e.type_ids for e in input_text])
         attention_mask = np.array([e.attention_mask for e in input_text])
+        
+        use_token_type_ids = token_type_ids is not None and not np.all(token_type_ids == 0)
+
+        if use_token_type_ids:
+            onnx_input = {
+                "input_ids": np.array(input_ids, dtype=np.int64),
+                "attention_mask": np.array(attention_mask, dtype=np.int64),
+                "token_type_ids": np.array(token_type_ids, dtype=np.int64),
+            }
+        else:
+            onnx_input = {
+                "input_ids": np.array(input_ids, dtype=np.int64),
+                "attention_mask": np.array(attention_mask, dtype=np.int64)
+            }
 
 
-        onnx_input = {
-            "input_ids": np.array(input_ids, dtype=np.int64),
-            "attention_mask": np.array(attention_mask, dtype=np.int64),
-            "token_type_ids": np.array(token_type_ids, dtype=np.int64),
-        }
-
         input_data = {k: v for k, v in onnx_input.items()}
 
         outputs = self.session.run(None, input_data)
@@ -149,5 +156,4 @@ def rerank(self, query, passages):
             })
 
         
-        return passage_info
-    
+        return passage_info
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='FlashRank', 
-    version='0.1.4', 
+    version='0.1.5', 
     packages=find_packages(),
     install_requires=[
         'tokenizers',

Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,6 @@`
`4`	`4`	`model_file_map = {`
`5`	`5`	`"ms-marco-TinyBERT-L-2-v2": "flashrank-TinyBERT-L-2-v2.onnx",`
`6`	`6`	`"ms-marco-MiniLM-L-12-v2": "flashrank-MiniLM-L-12-v2_Q.onnx",`
`7`		`- "ms-marco-MultiBERT-L-12": "flashrank-MultiBERT-L12_Q.onnx"`
	`7`	`+ "ms-marco-MultiBERT-L-12": "flashrank-MultiBERT-L12_Q.onnx",`
	`8`	`+ "rank-T5-flan": "flashrank-rankt5_Q.onnx"`
`8`	`9`	`}`