Merge pull request PrefectHQ#493 from PrefectHQ/default-gpt-4

jlia0 · Jul 24, 2023 · d79fc42 · d79fc42
2 parents 4ed973c + 6a9e0e4
commit d79fc42
Show file tree

Hide file tree

Showing 7 changed files with 18 additions and 25 deletions.
diff --git a/docs/src/docs/configuration/openai.md b/docs/src/docs/configuration/openai.md
@@ -1,6 +1,6 @@
 # OpenAI
 
-Marvin supports OpenAI's GPT-3.5 and GPT-4 models, and uses the `gpt-3.5-turbo` model by default. In order to use the OpenAI API, you must provide an API key.
+Marvin supports OpenAI's GPT-3.5 and GPT-4 models, and uses the `openai/gpt-4` model by default. In order to use the OpenAI API, you must provide an API key.
 
 ## Configuration
 
@@ -53,7 +53,5 @@ Once your API key is set, you can use any valid OpenAI model by providing it as
 ```python
 import marvin
 
-marvin.settings.llm_model = 'gpt-4-0613'
-```
-
-Marvin will automatically recognize that the `gpt-3.5*` and `gpt-4*` families of models use the OpenAI provider. To indicate a provider explicitly, prefix the model name with `openai/`. For example: `marvin.settings.llm_model = 'openai/gpt-4'`.
+marvin.settings.llm_model = 'openai/gpt-4-0613'
+```
diff --git a/docs/src/docs/configuration/settings.md b/docs/src/docs/configuration/settings.md
@@ -8,7 +8,7 @@ All settings are configurable via environment variables like `MARVIN_<setting na
 For example, in an `.env` file or in your shell config file you might have:
 ```shell
 MARVIN_LOG_LEVEL=DEBUG
-MARVIN_LLM_MODEL=gpt-4
+MARVIN_LLM_MODEL=openai/gpt-4
 MARVIN_LLM_TEMPERATURE=0
 ```
 
@@ -20,12 +20,12 @@ For example, to access or change the LLM model used by Marvin at runtime:
 import marvin
 
 marvin.settings.llm_model
-# 'gpt-4'
+# 'openai/gpt-4'
 
-marvin.settings.llm_model = 'gpt-3.5-turbo'
+marvin.settings.llm_model = 'openai/gpt-3.5-turbo'
 
 marvin.settings.llm_model
-# 'gpt-3.5-turbo'
+# 'openai/gpt-3.5-turbo'
 ```
 
 ## LLM Providers
@@ -34,17 +34,10 @@ Marvin supports multiple LLM providers, including [OpenAI](/src/docs/configurati
 
 Valid `llm_model` settings are strings with the form `"{provider_key}/{model_name}"`. For example, `"openai/gpt-3.5-turbo"`, `anthropic/claude-2`, or `azure_openai/gpt-4`.
 
-For well-known models, you may provide the model name without a provider key. These models include:
-
-- the `gpt-3.5-*` family from OpenAI
-- the `gpt-4*` family from OpenAI
-- the `claude-*` family from Anthropic
-
-
 
 | Provider | Provider Key | Models | Notes |
 | --- | --- | --- | --- |
-| OpenAI | `openai` | `gpt-3.5-turbo` (default), `gpt-4`, or any other [compatible model](https://platform.openai.com/docs/models/) | Marvin is generally tested and optimized with this provider. |
+| OpenAI | `openai` | `gpt-3.5-turbo`, `gpt-4` (default), or any other [compatible model](https://platform.openai.com/docs/models/) | Marvin is generally tested and optimized with this provider. |
 | Anthropic | `anthropic` | `claude-2`, `claude-instant-1` or any other [compatible model](https://docs.anthropic.com/claude/reference/selecting-a-model) | Available in Marvin 1.1|
 | Azure OpenAI Service | `azure_openai` | `gpt-35-turbo`, `gpt-4`, or any other [compatible model](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) | The Azure OpenAI Service shares all the same configuration options as the OpenAI models, as well as a few additional ones. Available in Marvin 1.1.  | 
 

diff --git a/docs/src/getting_started/quickstart.ipynb b/docs/src/getting_started/quickstart.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "## Configure LLM Provider\n",
     "\n",
-    "Marvin is a high-level interface for working with LLMs. In order to use it, you must configure an LLM provider. At this time, Marvin supports OpenAI's GPT-3.5 and GPT-4 models, and Anthropic's Claude 1 and Claude 2 models. The default model is OpenAI's `gpt-3.5-turbo`.\n",
+    "Marvin is a high-level interface for working with LLMs. In order to use it, you must configure an LLM provider. At this time, Marvin supports OpenAI's GPT-3.5 and GPT-4 models, Anthropic's Claude 1 and Claude 2 models, and the Azure OpenAI Service. The default model is OpenAI's `gpt-4`.\n",
     "\n",
     "To use the default model, provide an API key:"
    ]
@@ -26,7 +26,7 @@
    "source": [
     "import marvin\n",
     "\n",
-    "# to use an OpenAI model (if not specified, defaults to gpt-3.5-turbo)\n",
+    "# to use an OpenAI model (if not specified, defaults to gpt-4)\n",
     "marvin.settings.openai.api_key = YOUR_API_KEY"
    ]
   },

diff --git a/src/marvin/_framework/_defaults/__init__.py b/src/marvin/_framework/_defaults/__init__.py
@@ -3,7 +3,7 @@
 
 class DefaultSettings(BaseModel):
     default_model_path: str = "marvin.language_models.default"
-    default_model_name: str = "gpt-3.5-turbo"
+    default_model_name: str = "gpt-4"
     default_model_api_key_name: str = "OPENAI_API_KEY"
 
 

diff --git a/src/marvin/engine/language_models/base.py b/src/marvin/engine/language_models/base.py
@@ -113,14 +113,13 @@ async def run(
 
 
 def chat_llm(model: str = None, **kwargs) -> ChatLLM:
+    """Dispatches to all supported LLM providers"""
     if model is None:
         model = marvin.settings.llm_model
 
-    # automatically detect well-known model providers
+    # automatically detect gpt-3.5 and gpt-4 for backwards compatibility
     if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
         model = f"openai/{model}"
-    elif model.startswith("claude-"):
-        model = f"anthropic/{model}"
 
     # extract the provider and model name
     provider, model_name = model.split("/", 1)

diff --git a/src/marvin/settings.py b/src/marvin/settings.py
@@ -75,7 +75,7 @@ class Settings(MarvinBaseSettings):
     verbose: bool = False
 
     # LLMS
-    llm_model: str = "openai/gpt-3.5-turbo"
+    llm_model: str = "openai/gpt-4"
     llm_max_tokens: int = Field(
         1500, description="The max number of tokens for AI completions"
     )

diff --git a/src/marvin/tools/format_response.py b/src/marvin/tools/format_response.py
@@ -33,7 +33,10 @@ def __init__(self, type_: Union[type, GenericAlias] = SENTINEL, **kwargs):
                 raise ValueError(f"Expected a type or GenericAlias, got {type_}")
 
             # warn if the type is a set or tuple with GPT 3.5
-            if marvin.settings.llm_model.startswith("gpt-3.5"):
+            if (
+                "gpt-3.5" in marvin.settings.llm_model
+                or "gpt-35" in marvin.settings.llm_model
+            ):
                 if safe_issubclass(type_, (set, tuple)) or genericalias_contains(
                     type_, (set, tuple)
                 ):