Tokenizer pipe() method for duck-type compatibility with Spacy Tokeni…

…zer (#8) Tokenizer pipe() method for duck-type compatibility with Spacy Tokenizer
explosion · Apr 26, 2019 · b972009 · b972009
2 parents fc1c1df + bc549b3
commit b972009
Showing 1 changed file with 9 additions and 0 deletions.
diff --git a/spacy_stanfordnlp/language.py b/spacy_stanfordnlp/language.py
@@ -178,6 +178,15 @@ def __call__(self, text):
             doc.is_parsed = True
         return doc
 
+    def pipe(self, texts):
+        """Tokenize a stream of texts.
+
+        texts: A sequence of unicode texts.
+        YIELDS (Doc): A sequence of Doc objects, in order.
+        """
+        for text in texts:
+            yield self(text)
+
     def get_tokens_with_heads(self, snlp_doc):
         """Flatten the tokens in the StanfordNLP Doc and extract the token indices
         of the sentence start tokens to set is_sent_start.