Skip to content

Commit

Permalink
[v0.0.2] compatibility fix
Browse files Browse the repository at this point in the history
* `litellm` compatibility fix (now requires litellm>=1.41.0)
* `instructor` compatibility fix
* some minor changes
  • Loading branch information
dhh1995 committed Aug 13, 2024
1 parent 3fb1d00 commit 05fe92a
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 83 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ For a more comprehensive tutorial, please refer to the [tutorial](https://appl-t
### Cookbook
For more detailed usage and examples, please refer to the [cookbook](https://appl-team.github.io/appl/cookbook).

APPL can be used to reproduce some popular LM-based applications easily, such as:
* [Wordware's TwitterPersonality](https://twitter.wordware.ai/)[[APPL implementation](https://github.com/appl-team/TwitterPersonality)]: analyzes your tweets to determine your Twitter personality.

## Citation and Acknowledgment
If you find APPL helpful, please consider citing our paper:
```bibtex
Expand Down
2 changes: 1 addition & 1 deletion docs/cookbook/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ Welcome to the APPL Cookbook! This cookbook provides a collection of recipes to
1. [Chat with Codes](./chat_with_codes.md)

## Coding Prompts
1. [Writing Long Prompts in Modules](./long_prompts.md)
1. [Writing Long Prompts in Modules](./long_prompt.md)
163 changes: 112 additions & 51 deletions pdm.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "pdm.backend"

[project]
name = "applang"
version = "0.0.1"
version = "0.0.2"
description = "A Prompt Programming Language"
authors = [
{ name = "Honghua Dong", email = "[email protected]" },
Expand All @@ -23,7 +23,7 @@ dependencies = [
"PyYAML>=6.0.1",
"toml>=0.10.2",
"termcolor>=2.4.0",
"litellm>=1.29.5",
"litellm>=1.41.0",
"rich>=13.7.1",
"tiktoken>=0.6.0",
"langsmith>=0.1.25",
Expand Down
3 changes: 2 additions & 1 deletion src/appl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ def init(
override_configs = load_config(config_file)
logger.info("Loaded configs from {}".format(config_file))
configs.update(override_configs)
logger.info(f"update configs:\n{yaml.dump(override_configs.to_dict())}")
if configs.getattrs("settings.logging.display.configs_update"):
logger.info(f"update configs:\n{yaml.dump(override_configs.to_dict())}")
else:
caller_basename, dotenvs, appl_config_files = "appl", [], []
logger.error(
Expand Down
36 changes: 14 additions & 22 deletions src/appl/core/response.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,11 @@
import time

from litellm import CustomStreamWrapper, completion_cost, stream_chunk_builder
from litellm.exceptions import NotFoundError
from openai import Stream
from openai.types.chat import (
ChatCompletion,
ChatCompletionChunk,
ChatCompletionMessageToolCall,
)
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import (
ChoiceDelta,
ChoiceDeltaToolCallFunction,
)
from pydantic import model_validator
from tqdm import tqdm

from litellm import (
CustomStreamWrapper,
ModelResponse,
completion_cost,
stream_chunk_builder,
)
from litellm.utils import Delta, Function

from ..core.types import *
from .config import configs
from .tool import ToolCall
from .types import *
Expand All @@ -39,6 +22,10 @@ class CompletionResponse(BaseModel):
"""The raw response from the model."""
cost: Optional[float] = Field(None, description="The cost of the completion")
"""The cost of the completion."""
usage: Optional[CompletionUsage] = Field(
None, description="The usage of the completion"
)
"""The usage of the completion."""
chunks: List[Union[ModelResponse, ChatCompletionChunk]] = Field(
[], description="The chunks of the response when streaming"
)
Expand Down Expand Up @@ -81,7 +68,7 @@ def _post_init(self) -> "CompletionResponse":
def complete_response(self) -> Union[ModelResponse, ChatCompletion]:
"""The complete response from the model. This will block until the response is finished."""
if self.is_finished:
return self._complete_response
return self._complete_response # type: ignore
self.streaming() # ? when we should set display to False?
assert self.is_finished, "Response should be finished after streaming"
return self._complete_response # type: ignore
Expand Down Expand Up @@ -194,11 +181,16 @@ def _finish(self, response: Any) -> None:
return
self.is_finished = True
self._complete_response = response
self.usage = getattr(response, "usage", None)
try:
self.cost = completion_cost(response)
except NotFoundError:
pass
# parse the message and tool calls
if isinstance(response, (ModelResponse, ChatCompletion)):
message = response.choices[0].message
if getattr(message, "tool_calls", None):
for call in message.tool_calls:
if tool_calls := getattr(message, "tool_calls", None):
for call in tool_calls:
self.tool_calls.append(ToolCall.from_openai_tool_call(call))
elif message.content is not None:
self.message = message.content
Expand Down
1 change: 1 addition & 0 deletions src/appl/core/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .basic import *
from .content import *
from .custom import *
from .deps import *
from .futures import *
from .role import *

Expand Down
14 changes: 14 additions & 0 deletions src/appl/core/types/deps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Import types from dependencies"""

from litellm.types.utils import Delta, Function, ModelResponse
from openai.types import CompletionUsage
from openai.types.chat import (
ChatCompletion,
ChatCompletionChunk,
ChatCompletionMessageToolCall,
)
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import (
ChoiceDelta,
ChoiceDeltaToolCallFunction,
)
2 changes: 2 additions & 0 deletions src/appl/default_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ settings:
log_level: null # default to use the same log level as stdout
display:
configs: false # Display the configurations
configs_update: false # Display the updates of the configurations
llm_raw_call_args: false # Display the raw args for the llm calls
llm_raw_response: false # Display the raw response of the llm calls
llm_raw_usage: false # Display the raw usage of the llm calls
llm_call_args: true # Display the args for the llm calls
llm_response: true # Display the response of the llm calls
llm_cache: false # Display the cache info
Expand Down
13 changes: 7 additions & 6 deletions src/appl/servers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def chat_completion(**kwargs: Any) -> CompletionResponse:

log_llm_call_args = configs.getattrs("settings.logging.display.llm_raw_call_args")
log_llm_response = configs.getattrs("settings.logging.display.llm_raw_response")
log_llm_usage = configs.getattrs("settings.logging.display.llm_raw_usage")
log_llm_cache = configs.getattrs("settings.logging.display.llm_cache")
if log_llm_call_args:
logger.info(f"Call completion [{gen_id}] with args: {kwargs}")
Expand Down Expand Up @@ -94,12 +95,7 @@ def wrapped(**inner_kwargs: Any) -> Tuple[Any, bool]:

def post_completion(response: CompletionResponse) -> None:
raw_response = response.complete_response
cost = 0.0
if not use_cache:
try:
cost = completion_cost(raw_response)
except litellm.exceptions.NotFoundError:
pass
cost = 0.0 if use_cache else response.cost
response.cost = cost # update the cost
add_to_trace(
CompletionResponseEvent(
Expand All @@ -108,6 +104,8 @@ def post_completion(response: CompletionResponse) -> None:
)
if log_llm_response:
logger.info(f"Completion [{gen_id}] response: {response}")
if log_llm_usage and response.usage is not None:
logger.info(f"Completion [{gen_id}] usage: {response.usage}")

return CompletionResponse(
raw_response=raw_response, post_finish_callbacks=[post_completion]
Expand Down Expand Up @@ -186,6 +184,9 @@ def wrapper(**inner_kwargs: Any) -> CompletionResponse:
# fill in the response_model and response_obj
response.response_model = response_model
response.response_obj = results
# TODO?: update the cost for multiple retries
# instructor has updated the total usage for retries
# ?? response.cost = completion_cost({"usage": response.usage})
else:
response = chat_completion(**kwargs)
return response
Expand Down

0 comments on commit 05fe92a

Please sign in to comment.