[v0.0.2] compatibility fix

* `litellm` compatibility fix (now requires litellm>=1.41.0) * `instructor` compatibility fix * some minor changes
appl-team · Aug 13, 2024 · 05fe92a · 05fe92a
1 parent 3fb1d00
commit 05fe92a
Show file tree

Hide file tree

Showing 10 changed files with 158 additions and 83 deletions.
diff --git a/README.md b/README.md
@@ -144,6 +144,9 @@ For a more comprehensive tutorial, please refer to the [tutorial](https://appl-t
 ### Cookbook
 For more detailed usage and examples, please refer to the [cookbook](https://appl-team.github.io/appl/cookbook).
 
+APPL can be used to reproduce some popular LM-based applications easily, such as:
+* [Wordware's TwitterPersonality](https://twitter.wordware.ai/)[[APPL implementation](https://github.com/appl-team/TwitterPersonality)]: analyzes your tweets to determine your Twitter personality.
+
 ## Citation and Acknowledgment
 If you find APPL helpful, please consider citing our paper:
 ```bibtex

diff --git a/docs/cookbook/index.md b/docs/cookbook/index.md
@@ -16,4 +16,4 @@ Welcome to the APPL Cookbook! This cookbook provides a collection of recipes to
 1. [Chat with Codes](./chat_with_codes.md)
 
 ## Coding Prompts
-1. [Writing Long Prompts in Modules](./long_prompts.md)
+1. [Writing Long Prompts in Modules](./long_prompt.md)
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
 
 [project]
 name = "applang"
-version = "0.0.1"
+version = "0.0.2"
 description = "A Prompt Programming Language"
 authors = [
     { name = "Honghua Dong", email = "[email protected]" },
@@ -23,7 +23,7 @@ dependencies = [
     "PyYAML>=6.0.1",
     "toml>=0.10.2",
     "termcolor>=2.4.0",
-    "litellm>=1.29.5",
+    "litellm>=1.41.0",
     "rich>=13.7.1",
     "tiktoken>=0.6.0",
     "langsmith>=0.1.25",

diff --git a/src/appl/__init__.py b/src/appl/__init__.py
@@ -129,7 +129,8 @@ def init(
             override_configs = load_config(config_file)
             logger.info("Loaded configs from {}".format(config_file))
             configs.update(override_configs)
-            logger.info(f"update configs:\n{yaml.dump(override_configs.to_dict())}")
+            if configs.getattrs("settings.logging.display.configs_update"):
+                logger.info(f"update configs:\n{yaml.dump(override_configs.to_dict())}")
     else:
         caller_basename, dotenvs, appl_config_files = "appl", [], []
         logger.error(

diff --git a/src/appl/core/response.py b/src/appl/core/response.py
@@ -1,28 +1,11 @@
 import time
 
+from litellm import CustomStreamWrapper, completion_cost, stream_chunk_builder
+from litellm.exceptions import NotFoundError
 from openai import Stream
-from openai.types.chat import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionMessageToolCall,
-)
-from openai.types.chat.chat_completion import Choice
-from openai.types.chat.chat_completion_chunk import (
-    ChoiceDelta,
-    ChoiceDeltaToolCallFunction,
-)
 from pydantic import model_validator
 from tqdm import tqdm
 
-from litellm import (
-    CustomStreamWrapper,
-    ModelResponse,
-    completion_cost,
-    stream_chunk_builder,
-)
-from litellm.utils import Delta, Function
-
-from ..core.types import *
 from .config import configs
 from .tool import ToolCall
 from .types import *
@@ -39,6 +22,10 @@ class CompletionResponse(BaseModel):
     """The raw response from the model."""
     cost: Optional[float] = Field(None, description="The cost of the completion")
     """The cost of the completion."""
+    usage: Optional[CompletionUsage] = Field(
+        None, description="The usage of the completion"
+    )
+    """The usage of the completion."""
     chunks: List[Union[ModelResponse, ChatCompletionChunk]] = Field(
         [], description="The chunks of the response when streaming"
     )
@@ -81,7 +68,7 @@ def _post_init(self) -> "CompletionResponse":
     def complete_response(self) -> Union[ModelResponse, ChatCompletion]:
         """The complete response from the model. This will block until the response is finished."""
         if self.is_finished:
-            return self._complete_response
+            return self._complete_response  # type: ignore
         self.streaming()  # ? when we should set display to False?
         assert self.is_finished, "Response should be finished after streaming"
         return self._complete_response  # type: ignore
@@ -194,11 +181,16 @@ def _finish(self, response: Any) -> None:
             return
         self.is_finished = True
         self._complete_response = response
+        self.usage = getattr(response, "usage", None)
+        try:
+            self.cost = completion_cost(response)
+        except NotFoundError:
+            pass
         # parse the message and tool calls
         if isinstance(response, (ModelResponse, ChatCompletion)):
             message = response.choices[0].message
-            if getattr(message, "tool_calls", None):
-                for call in message.tool_calls:
+            if tool_calls := getattr(message, "tool_calls", None):
+                for call in tool_calls:
                     self.tool_calls.append(ToolCall.from_openai_tool_call(call))
             elif message.content is not None:
                 self.message = message.content

diff --git a/src/appl/core/types/__init__.py b/src/appl/core/types/__init__.py
@@ -1,6 +1,7 @@
 from .basic import *
 from .content import *
 from .custom import *
+from .deps import *
 from .futures import *
 from .role import *
 

diff --git a/src/appl/core/types/deps.py b/src/appl/core/types/deps.py
@@ -0,0 +1,14 @@
+"""Import types from dependencies"""
+
+from litellm.types.utils import Delta, Function, ModelResponse
+from openai.types import CompletionUsage
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessageToolCall,
+)
+from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDelta,
+    ChoiceDeltaToolCallFunction,
+)
diff --git a/src/appl/default_configs.yaml b/src/appl/default_configs.yaml
@@ -17,8 +17,10 @@ settings:
       log_level: null # default to use the same log level as stdout
     display:
       configs: false # Display the configurations
+      configs_update: false # Display the updates of the configurations
       llm_raw_call_args: false # Display the raw args for the llm calls
       llm_raw_response: false # Display the raw response of the llm calls
+      llm_raw_usage: false # Display the raw usage of the llm calls
       llm_call_args: true # Display the args for the llm calls
       llm_response: true # Display the response of the llm calls
       llm_cache: false # Display the cache info

diff --git a/src/appl/servers/api.py b/src/appl/servers/api.py
@@ -67,6 +67,7 @@ def chat_completion(**kwargs: Any) -> CompletionResponse:
 
     log_llm_call_args = configs.getattrs("settings.logging.display.llm_raw_call_args")
     log_llm_response = configs.getattrs("settings.logging.display.llm_raw_response")
+    log_llm_usage = configs.getattrs("settings.logging.display.llm_raw_usage")
     log_llm_cache = configs.getattrs("settings.logging.display.llm_cache")
     if log_llm_call_args:
         logger.info(f"Call completion [{gen_id}] with args: {kwargs}")
@@ -94,12 +95,7 @@ def wrapped(**inner_kwargs: Any) -> Tuple[Any, bool]:
 
     def post_completion(response: CompletionResponse) -> None:
         raw_response = response.complete_response
-        cost = 0.0
-        if not use_cache:
-            try:
-                cost = completion_cost(raw_response)
-            except litellm.exceptions.NotFoundError:
-                pass
+        cost = 0.0 if use_cache else response.cost
         response.cost = cost  # update the cost
         add_to_trace(
             CompletionResponseEvent(
@@ -108,6 +104,8 @@ def post_completion(response: CompletionResponse) -> None:
         )
         if log_llm_response:
             logger.info(f"Completion [{gen_id}] response: {response}")
+        if log_llm_usage and response.usage is not None:
+            logger.info(f"Completion [{gen_id}] usage: {response.usage}")
 
     return CompletionResponse(
         raw_response=raw_response, post_finish_callbacks=[post_completion]
@@ -186,6 +184,9 @@ def wrapper(**inner_kwargs: Any) -> CompletionResponse:
             # fill in the response_model and response_obj
             response.response_model = response_model
             response.response_obj = results
+            # TODO?: update the cost for multiple retries
+            # instructor has updated the total usage for retries
+            # ?? response.cost = completion_cost({"usage": response.usage})
         else:
             response = chat_completion(**kwargs)
         return response