Merge pull request explosion#416 from explosion/develop

Synch `main` with `develop`
selectorseb · Jan 19, 2024 · 96237c7 · 96237c7
2 parents 0cb81d2 + 377b1d4
commit 96237c7
Show file tree

Hide file tree

Showing 141 changed files with 6,777 additions and 1,157 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -24,8 +24,6 @@ jobs:
  os: [ubuntu-latest, windows-latest, macos-latest]
  python_version: ["3.11"]
  include:
- - os: ubuntu-20.04
- python_version: "3.6"
  - os: windows-latest
  python_version: "3.7"
  - os: macos-latest

diff --git a/.github/workflows/test_gpu.yml b/.github/workflows/test_gpu.yml
@@ -1,8 +1,8 @@
 name: GPU tests
 
 on:
- schedule:
- - cron: "0 0 * * *"
+# schedule:
+# - cron: "0 0 * * *"
  issue_comment:
  types: [created]
  workflow_dispatch:

diff --git a/README.md b/README.md
@@ -1,6 +1,11 @@
 <a href="https://explosion.ai"><img src="https://explosion.ai/assets/img/logo.svg" width="125" height="125" align="right" /></a>
+<a href="https://explosion.ai"><img src="assets/logo.png" width="125" height="125" align="left" style="margin-right:30px" /></a>
 
-# spacy-llm: Integrating LLMs into structured NLP pipelines
+<h1 align="center">
+<span style="font: bold 38pt'Courier New';">spacy-llm</span>
+<br>Structured NLP with LLMs
+</h1>
+<br><br>
 
 [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/explosion/spacy-llm/test.yml?branch=main)](https://github.com/explosion/spacy-llm/actions/workflows/test.yml)
 [![pypi Version](https://img.shields.io/pypi/v/spacy-llm.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/spacy-llm/)
@@ -16,7 +21,8 @@ This package integrates Large Language Models (LLMs) into [spaCy](https://spacy.
  - **[OpenAI](https://platform.openai.com/docs/api-reference/)**
  - **[Cohere](https://docs.cohere.com/reference/generate)**
  - **[Anthropic](https://docs.anthropic.com/claude/reference/)**
- - **[PaLM](https://ai.google/discover/palm2/)**
+ - **[Google PaLM](https://ai.google/discover/palm2/)**
+ - **[Microsoft Azure AI](https://azure.microsoft.com/en-us/solutions/ai)**
 - Supports open-source LLMs hosted on Hugging Face 🤗:
  - **[Falcon](https://huggingface.co/tiiuae)**
  - **[Dolly](https://huggingface.co/databricks)**
@@ -33,10 +39,13 @@ This package integrates Large Language Models (LLMs) into [spaCy](https://spacy.
  - Sentiment analysis
  - Span categorization
  - Summarization
+ - Entity linking
+ - Translation
+ - Raw prompt execution for maximum flexibility
  - Soon:
- - Entity linking
  - Semantic role labeling
 - Easy implementation of **your own functions** via [spaCy's registry](https://spacy.io/api/top-level#registry) for custom prompting, parsing and model integrations
+- Map-reduce approach for splitting prompts too long for LLM's context window and fusing the results back together
 
 ## 🧠 Motivation
 

diff --git a/assets/logo.png b/assets/logo.png
diff --git a/setup.cfg b/setup.cfg
@@ -16,7 +16,6 @@ classifiers =
  Operating System :: MacOS :: MacOS X
  Operating System :: Microsoft :: Windows
  Programming Language :: Python :: 3
- Programming Language :: Python :: 3.6
  Programming Language :: Python :: 3.7
  Programming Language :: Python :: 3.8
  Programming Language :: Python :: 3.9
@@ -30,7 +29,7 @@ project_urls =
 [options]
 zip_safe = false
 include_package_data = true
-python_requires = >=3.6
+python_requires = >=3.7
 install_requires =
  spacy>=3.5,<4.0
  jinja2

diff --git a/spacy_llm/cache.py b/spacy_llm/cache.py
@@ -8,7 +8,7 @@
 from spacy.vocab import Vocab
 
 from .registry import registry
-from .ty import LLMTask, PromptTemplateProvider
+from .ty import PromptTemplateProvider, ShardingLLMTask
 
 
 @registry.llm_misc("spacy.BatchCache.v1")
@@ -68,11 +68,11 @@ def __init__(
 
  self._init_cache_dir()
 
- def initialize(self, vocab: Vocab, task: LLMTask) -> None:
+ def initialize(self, vocab: Vocab, task: ShardingLLMTask) -> None:
  """
  Initialize cache with data not available at construction time.
  vocab (Vocab): Vocab object.
- task (LLMTask): Task.
+ task (ShardingLLMTask): Task.
  """
  self._vocab = vocab
  if isinstance(task, PromptTemplateProvider):

diff --git a/spacy_llm/compat.py b/spacy_llm/compat.py
@@ -57,5 +57,7 @@
 if PYDANTIC_V2:
  from pydantic.v1 import BaseModel, ExtraError, ValidationError # noqa: F401
  from pydantic.v1 import validator
+ from pydantic.v1.generics import GenericModel # noqa: F401
 else:
  from pydantic import BaseModel, ExtraError, ValidationError, validator # noqa: F401
+ from pydantic.generics import GenericModel # noqa: F401
diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py
@@ -17,16 +17,18 @@ def __init__(
  name: str,
  config_init: Optional[Dict[str, Any]],
  config_run: Optional[Dict[str, Any]],
+ context_length: Optional[int],
  ):
  """Initializes HF model instance.
  query (Callable[[Any, Iterable[Any]], Iterable[Any]): Callable executing LLM prompts when
  supplied with the `integration` object.
  name (str): Name of HF model to load (without account name).
  config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
  config_run (Optional[Dict[str, Any]]): HF config for running the model.
- inference_config (Dict[Any, Any]): HF config for model run.
+ context_length (Optional[int]): Context length for this model. Necessary for sharding.
  """
  self._name = name if self.hf_account in name else f"{self.hf_account}/{name}"
+ self._context_length = context_length
  default_cfg_init, default_cfg_run = self.compile_default_configs()
  self._config_init, self._config_run = default_cfg_init, default_cfg_run
 
@@ -73,10 +75,10 @@ def __init__(
  self._model = self.init_model()
 
  @abc.abstractmethod
- def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]:
+ def __call__(self, prompts: Iterable[Iterable[Any]]) -> Iterable[Iterable[Any]]:
  """Executes prompts on specified API.
- prompts (Iterable[Any]): Prompts to execute.
- RETURNS (Iterable[Any]): API responses.
+ prompts (Iterable[Iterable[Any]]): Prompts to execute per doc.
+ RETURNS (Iterable[Iterable[Any]]): API responses per doc.
  """
 
  def _check_model(self) -> None:
@@ -93,6 +95,13 @@ def get_model_names(cls) -> Tuple[str, ...]:
  """
  return tuple(str(arg) for arg in cls.MODEL_NAMES.__args__) # type: ignore[attr-defined]
 
+ @property
+ def context_length(self) -> Optional[int]:
+ """Returns context length in number of tokens for this model.
+ RETURNS (Optional[int]): Max. number of tokens allowed in prompt for the current model.
+ """
+ return self._context_length
+
  @property
  @abc.abstractmethod
  def hf_account(self) -> str:

diff --git a/spacy_llm/models/hf/dolly.py b/spacy_llm/models/hf/dolly.py
@@ -18,14 +18,18 @@ def init_model(self) -> Any:
  model=self._name, return_full_text=False, **self._config_init
  )
 
- def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override]
+ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
  """Queries Dolly HF model.
  pipeline (transformers.pipeline): Transformers pipeline to query.
- prompts (Iterable[str]): Prompts to query Dolly model with.
- RETURNS (Iterable[str]): Prompt responses.
+ prompts (Iterable[Iterable[str]]): Prompts per doc to query Dolly model with.
+ RETURNS (Iterable[Iterable[str]]): Prompt responses per doc.
  """
  return [
- self._model(pr, **self._config_run)[0]["generated_text"] for pr in prompts
+ [
+ self._model(pr, **self._config_run)[0]["generated_text"]
+ for pr in prompts_for_doc
+ ]
+ for prompts_for_doc in prompts
  ]
 
  @property
@@ -52,12 +56,14 @@ def dolly_hf(
  name: Dolly.MODEL_NAMES,
  config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
  config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-) -> Callable[[Iterable[str]], Iterable[str]]:
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
  """Generates Dolly instance that can execute a set of prompts and return the raw responses.
  name (Literal): Name of the Dolly model. Has to be one of Dolly.get_model_names().
  config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
  config_run (Optional[Dict[str, Any]]): HF config for running the model.
  RETURNS (Callable[[Iterable[str]], Iterable[str]]): Dolly instance that can execute a set of prompts and return
  the raw responses.
  """
- return Dolly(name=name, config_init=config_init, config_run=config_run)
+ return Dolly(
+ name=name, config_init=config_init, config_run=config_run, context_length=2048
+ )
diff --git a/spacy_llm/models/hf/falcon.py b/spacy_llm/models/hf/falcon.py
@@ -17,9 +17,15 @@ def __init__(
  name: MODEL_NAMES,
  config_init: Optional[Dict[str, Any]],
  config_run: Optional[Dict[str, Any]],
+ context_length: Optional[int],
  ):
  self._tokenizer: Optional["transformers.AutoTokenizer"] = None
- super().__init__(name=name, config_init=config_init, config_run=config_run)
+ super().__init__(
+ name=name,
+ config_init=config_init,
+ config_run=config_run,
+ context_length=context_length,
+ )
 
  assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)
  self._config_run["pad_token_id"] = self._tokenizer.pad_token_id
@@ -45,10 +51,15 @@ def init_model(self) -> Any:
  def hf_account(self) -> str:
  return "tiiuae"
 
- def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override]
+ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
  return [
- self._model(pr, generation_config=self._hf_config_run)[0]["generated_text"]
- for pr in prompts
+ [
+ self._model(pr, generation_config=self._hf_config_run)[0][
+ "generated_text"
+ ]
+ for pr in prompts_for_doc
+ ]
+ for prompts_for_doc in prompts
  ]
 
  @staticmethod
@@ -68,12 +79,14 @@ def falcon_hf(
  name: Falcon.MODEL_NAMES,
  config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
  config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-) -> Callable[[Iterable[str]], Iterable[str]]:
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
  """Generates Falcon instance that can execute a set of prompts and return the raw responses.
  name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names().
  config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
  config_run (Optional[Dict[str, Any]]): HF config for running the model.
  RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return
  the raw responses.
  """
- return Falcon(name=name, config_init=config_init, config_run=config_run)
+ return Falcon(
+ name=name, config_init=config_init, config_run=config_run, context_length=2048
+ )
diff --git a/spacy_llm/models/hf/llama2.py b/spacy_llm/models/hf/llama2.py
@@ -17,8 +17,14 @@ def __init__(
  name: MODEL_NAMES,
  config_init: Optional[Dict[str, Any]],
  config_run: Optional[Dict[str, Any]],
+ context_length: Optional[int],
  ):
- super().__init__(name=name, config_init=config_init, config_run=config_run)
+ super().__init__(
+ name=name,
+ config_init=config_init,
+ config_run=config_run,
+ context_length=context_length,
+ )
  # Instantiate GenerationConfig object from config dict.
  self._hf_config_run = transformers.GenerationConfig.from_pretrained(
  self._name,
@@ -39,10 +45,15 @@ def init_model(self) -> Any:
  def hf_account(self) -> str:
  return "meta-llama"
 
- def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override]
+ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
  return [
- self._model(pr, generation_config=self._hf_config_run)[0]["generated_text"]
- for pr in prompts
+ [
+ self._model(pr, generation_config=self._hf_config_run)[0][
+ "generated_text"
+ ]
+ for pr in prompts_for_doc
+ ]
+ for prompts_for_doc in prompts
  ]
 
  @staticmethod
@@ -55,12 +66,14 @@ def llama2_hf(
  name: Llama2.MODEL_NAMES,
  config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
  config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-) -> Callable[[Iterable[str]], Iterable[str]]:
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
  """Generates Llama 2 instance that can execute a set of prompts and return the raw responses.
  name (Literal): Name of the Llama 2 model. Has to be one of Llama2.get_model_names().
  config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
  config_run (Optional[Dict[str, Any]]): HF config for running the model.
  RETURNS (Callable[[Iterable[str]], Iterable[str]]): Llama2 instance that can execute a set of prompts and return
  the raw responses.
  """
- return Llama2(name=name, config_init=config_init, config_run=config_run)
+ return Llama2(
+ name=name, config_init=config_init, config_run=config_run, context_length=4096
+ )
diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Iterable, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional
 
 from confection import SimpleFrozenDict
 
@@ -15,10 +15,16 @@ def __init__(
  name: MODEL_NAMES,
  config_init: Optional[Dict[str, Any]],
  config_run: Optional[Dict[str, Any]],
+ context_length: Optional[int],
  ):
  self._tokenizer: Optional["transformers.AutoTokenizer"] = None
  self._is_instruct = "instruct" in name
- super().__init__(name=name, config_init=config_init, config_run=config_run)
+ super().__init__(
+ name=name,
+ config_init=config_init,
+ config_run=config_run,
+ context_length=context_length,
+ )
 
  assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)
 
@@ -48,43 +54,54 @@ def init_model(self) -> Any:
  def hf_account(self) -> str:
  return "mistralai"
 
- def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override]
+ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
  assert callable(self._tokenizer)
  assert hasattr(self._model, "generate")
  assert hasattr(self._tokenizer, "batch_decode")
- prompts = list(prompts)
-
- tokenized_input_ids = [
- self._tokenizer(
- prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
- return_tensors="pt",
- ).input_ids
- for prompt in prompts
- ]
- tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids]
-
- return [
- self._tokenizer.decode(
- self._model.generate(
- input_ids=tok_ii, generation_config=self._hf_config_run
- )[:, tok_ii.shape[1] :][0],
- skip_special_tokens=True,
+ responses: List[List[str]] = []
+
+ for prompts_for_doc in prompts:
+ prompts_for_doc = list(prompts_for_doc)
+
+ tokenized_input_ids = [
+ self._tokenizer(
+ prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
+ return_tensors="pt",
+ ).input_ids
+ for prompt in prompts_for_doc
+ ]
+ tokenized_input_ids = [
+ tp.to(self._model.device) for tp in tokenized_input_ids
+ ]
+
+ responses.append(
+ [
+ self._tokenizer.decode(
+ self._model.generate(
+ input_ids=tok_ii, generation_config=self._hf_config_run
+ )[:, tok_ii.shape[1] :][0],
+ skip_special_tokens=True,
+ )
+ for tok_ii in tokenized_input_ids
+ ]
  )
- for tok_ii in tokenized_input_ids
- ]
+
+ return responses
 
 
 @registry.llm_models("spacy.Mistral.v1")
 def mistral_hf(
  name: Mistral.MODEL_NAMES,
  config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
  config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-) -> Callable[[Iterable[str]], Iterable[str]]:
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
  """Generates Mistral instance that can execute a set of prompts and return the raw responses.
  name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names().
  config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
  config_run (Optional[Dict[str, Any]]): HF config for running the model.
  RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return
  the raw responses.
  """
- return Mistral(name=name, config_init=config_init, config_run=config_run)
+ return Mistral(
+ name=name, config_init=config_init, config_run=config_run, context_length=8000
+ )