Add permanent storage option for EventStream (All-Hands-AI#1697)

* add storage classes * add minio * add event stream storage * storage test working * use fixture * event stream test passing * better serialization * factor out serialization pkg * move more serialization * fix tests * fix test * remove __all__ * add rehydration test * add more rehydration test * fix fixture * fix dict init * update tests * lock * regenerate tests * Update opendevin/events/stream.py * revert tests * revert old integration tests * only add fields if present * regen tests * pin pyarrow * fix unit tests * remove cause from memories * revert tests * regen tests
kernelai · May 14, 2024 · dcb5d1c · dcb5d1c
1 parent beb74a1
commit dcb5d1c
Show file tree

Hide file tree

Showing 73 changed files with 1,022 additions and 809 deletions.
diff --git a/.gitignore b/.gitignore
@@ -204,3 +204,4 @@ cache
 config.toml
 
 test_results*
+/_test_files_tmp/
diff --git a/agenthub/SWE_agent/agent.py b/agenthub/SWE_agent/agent.py
@@ -7,6 +7,7 @@
     MessageAction,
 )
 from opendevin.events.observation import Observation
+from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
 
 from .parser import parse_command
@@ -36,7 +37,7 @@ def __init__(self, llm: LLM):
 
     def _remember(self, action: Action, observation: Observation) -> None:
         """Agent has a limited memory of the few steps implemented as a queue"""
-        memory = MEMORY_FORMAT(action.to_memory(), observation.to_memory())
+        memory = MEMORY_FORMAT(event_to_memory(action), event_to_memory(observation))
         self.running_memory.append(memory)
 
     def _think_act(self, messages: list[dict]) -> tuple[Action, str]:

diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py
@@ -24,6 +24,7 @@
     NullObservation,
     Observation,
 )
+from opendevin.events.serialization.event import event_to_dict
 from opendevin.llm.llm import LLM
 
 """
@@ -138,8 +139,8 @@ def step(self, state: State) -> Action:
                 expected_observations = prev_step['observations']
                 hist_start = len(state.history) - len(expected_observations)
                 for i in range(len(expected_observations)):
-                    hist_obs = state.history[hist_start + i][1].to_dict()
-                    expected_obs = expected_observations[i].to_dict()
+                    hist_obs = event_to_dict(state.history[hist_start + i][1])
+                    expected_obs = event_to_dict(expected_observations[i])
                     if (
                         'command_id' in hist_obs['extras']
                         and hist_obs['extras']['command_id'] != -1

diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py
@@ -3,7 +3,8 @@
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.core.utils import json
-from opendevin.events.action import Action, action_from_dict
+from opendevin.events.action import Action
+from opendevin.events.serialization.action import action_from_dict
 from opendevin.llm.llm import LLM
 
 from .instructions import instructions

diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py
@@ -22,6 +22,7 @@
     NullObservation,
     Observation,
 )
+from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
 from opendevin.memory.condenser import MemoryCondenser
 from opendevin.memory.history import ShortTermHistory
@@ -186,7 +187,7 @@ def _add_initial_thoughts(self, task):
                     observation = BrowserOutputObservation(
                         content=thought, url='', screenshot=''
                     )
-                self._add_event(observation.to_memory())
+                self._add_event(event_to_memory(observation))
                 previous_action = ''
             else:
                 action: Action = NullAction()
@@ -213,7 +214,7 @@ def _add_initial_thoughts(self, task):
                     previous_action = ActionType.BROWSE
                 else:
                     action = MessageAction(thought)
-                self._add_event(action.to_memory())
+                self._add_event(event_to_memory(action))
 
     def step(self, state: State) -> Action:
         """
@@ -229,8 +230,8 @@ def step(self, state: State) -> Action:
         goal = state.get_current_user_intent()
         self._initialize(goal)
         for prev_action, obs in state.updated_info:
-            self._add_event(prev_action.to_memory())
-            self._add_event(obs.to_memory())
+            self._add_event(event_to_memory(prev_action))
+            self._add_event(event_to_memory(obs))
 
         state.updated_info = []
 

diff --git a/agenthub/monologue_agent/utils/prompts.py b/agenthub/monologue_agent/utils/prompts.py
@@ -2,11 +2,11 @@
 from opendevin.core.utils import json
 from opendevin.events.action import (
     Action,
-    action_from_dict,
 )
 from opendevin.events.observation import (
     CmdOutputObservation,
 )
+from opendevin.events.serialization.action import action_from_dict
 
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is this:

diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py
@@ -5,11 +5,12 @@
 from opendevin.events.action import (
     Action,
     NullAction,
-    action_from_dict,
 )
 from opendevin.events.observation import (
     NullObservation,
 )
+from opendevin.events.serialization.action import action_from_dict
+from opendevin.events.serialization.event import event_to_memory
 
 HISTORY_SIZE = 10
 
@@ -139,10 +140,10 @@ def get_prompt(state: State) -> str:
     latest_action: Action = NullAction()
     for action, observation in sub_history:
         if not isinstance(action, NullAction):
-            history_dicts.append(action.to_memory())
+            history_dicts.append(event_to_memory(action))
             latest_action = action
         if not isinstance(observation, NullObservation):
-            observation_dict = observation.to_memory()
+            observation_dict = event_to_memory(observation)
             history_dicts.append(observation_dict)
     history_str = json.dumps(history_dicts, indent=2)
     current_task = state.root_task.get_current_task()
@@ -152,7 +153,7 @@ def get_prompt(state: State) -> str:
             plan_status += "\nIf it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW."
     else:
         plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
-    hint = get_hint(latest_action.to_dict()['action'])
+    hint = get_hint(event_to_memory(latest_action).get('action', ''))
     logger.info('HINT:\n' + hint, extra={'msg_type': 'INFO'})
     task = state.get_current_user_intent()
     return prompt % {

diff --git a/opendevin/core/config.py b/opendevin/core/config.py
@@ -71,6 +71,8 @@ class AppConfig(metaclass=Singleton):
     llm: LLMConfig = field(default_factory=LLMConfig)
     agent: AgentConfig = field(default_factory=AgentConfig)
     runtime: str = 'server'
+    file_store: str = 'memory'
+    file_store_path: str = '/tmp/file_store'
     workspace_base: str = os.getcwd()
     workspace_mount_path: str = os.getcwd()
     workspace_mount_path_in_sandbox: str = '/workspace'

diff --git a/opendevin/core/main.py b/opendevin/core/main.py
@@ -76,7 +76,7 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
     AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
     agent = AgentCls(llm=llm)
 
-    event_stream = EventStream()
+    event_stream = EventStream('main')
     controller = AgentController(
         agent=agent,
         max_iterations=args.max_iterations,

diff --git a/opendevin/events/action/__init__.py b/opendevin/events/action/__init__.py
@@ -1,5 +1,3 @@
-from opendevin.core.exceptions import AgentMalformedActionError
-
 from .action import Action
 from .agent import (
     AgentDelegateAction,
@@ -16,49 +14,6 @@
 from .message import MessageAction
 from .tasks import AddTaskAction, ModifyTaskAction
 
-actions = (
-    CmdKillAction,
-    CmdRunAction,
-    IPythonRunCellAction,
-    BrowseURLAction,
-    FileReadAction,
-    FileWriteAction,
-    AgentRecallAction,
-    AgentFinishAction,
-    AgentRejectAction,
-    AgentDelegateAction,
-    AddTaskAction,
-    ModifyTaskAction,
-    ChangeAgentStateAction,
-    MessageAction,
-)
-
-ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions}  # type: ignore[attr-defined]
-
-
-def action_from_dict(action: dict) -> Action:
-    if not isinstance(action, dict):
-        raise AgentMalformedActionError('action must be a dictionary')
-    action = action.copy()
-    if 'action' not in action:
-        raise AgentMalformedActionError(f"'action' key is not found in {action=}")
-    if not isinstance(action['action'], str):
-        raise AgentMalformedActionError(
-            f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}"
-        )
-    action_class = ACTION_TYPE_TO_CLASS.get(action['action'])
-    if action_class is None:
-        raise AgentMalformedActionError(
-            f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}"
-        )
-    args = action.get('args', {})
-    try:
-        decoded_action = action_class(**args)
-    except TypeError:
-        raise AgentMalformedActionError(f'action={action} has the wrong arguments')
-    return decoded_action
-
-
 __all__ = [
     'Action',
     'NullAction',

diff --git a/opendevin/events/action/action.py b/opendevin/events/action/action.py
@@ -7,11 +7,3 @@
 @dataclass
 class Action(Event):
     runnable: ClassVar[bool] = False
-
-    def to_memory(self):
-        d = super().to_memory()
-        try:
-            v = d.pop('action')
-        except KeyError:
-            raise NotImplementedError(f'{self=} does not have action attribute set')
-        return {'action': v, 'args': d}
diff --git a/opendevin/events/action/commands.py b/opendevin/events/action/commands.py
@@ -35,10 +35,10 @@ class CmdKillAction(Action):
 
     @property
     def message(self) -> str:
-        return f'Killing command: {self.id}'
+        return f'Killing command: {self.command_id}'
 
     def __str__(self) -> str:
-        return f'**CmdKillAction**\n{self.id}'
+        return f'**CmdKillAction**\n{self.command_id}'
 
 
 @dataclass

diff --git a/opendevin/events/event.py b/opendevin/events/event.py
@@ -1,5 +1,5 @@
 import datetime
-from dataclasses import asdict, dataclass
+from dataclasses import dataclass
 from enum import Enum
 from typing import Optional
 
@@ -11,16 +11,6 @@ class EventSource(str, Enum):
 
 @dataclass
 class Event:
-    def to_memory(self):
-        return asdict(self)
-
-    def to_dict(self):
-        d = self.to_memory()
-        if self.source:
-            d['source'] = self.source
-        d['message'] = self.message
-        return d
-
     @property
     def message(self) -> str:
         if hasattr(self, '_message'):

diff --git a/opendevin/events/observation/__init__.py b/opendevin/events/observation/__init__.py
@@ -9,40 +9,6 @@
 from .recall import AgentRecallObservation
 from .success import SuccessObservation
 
-observations = (
-    CmdOutputObservation,
-    BrowserOutputObservation,
-    FileReadObservation,
-    FileWriteObservation,
-    AgentRecallObservation,
-    AgentDelegateObservation,
-    SuccessObservation,
-    ErrorObservation,
-    AgentStateChangedObservation,
-)
-
-OBSERVATION_TYPE_TO_CLASS = {
-    observation_class.observation: observation_class  # type: ignore[attr-defined]
-    for observation_class in observations
-}
-
-
-def observation_from_dict(observation: dict) -> Observation:
-    observation = observation.copy()
-    if 'observation' not in observation:
-        raise KeyError(f"'observation' key is not found in {observation=}")
-    observation_class = OBSERVATION_TYPE_TO_CLASS.get(observation['observation'])
-    if observation_class is None:
-        raise KeyError(
-            f"'{observation['observation']=}' is not defined. Available observations: {OBSERVATION_TYPE_TO_CLASS.keys()}"
-        )
-    observation.pop('observation')
-    observation.pop('message', None)
-    content = observation.pop('content', '')
-    extras = observation.pop('extras', {})
-    return observation_class(content=content, **extras)
-
-
 __all__ = [
     'Observation',
     'NullObservation',
@@ -54,4 +20,6 @@ def observation_from_dict(observation: dict) -> Observation:
     'AgentRecallObservation',
     'ErrorObservation',
     'AgentStateChangedObservation',
+    'AgentDelegateObservation',
+    'SuccessObservation',
 ]
diff --git a/opendevin/events/observation/browse.py b/opendevin/events/observation/browse.py
@@ -1,7 +1,6 @@
 from dataclasses import dataclass, field
 
 from opendevin.core.schema import ObservationType
-from opendevin.events.utils import remove_fields
 
 from .observation import Observation
 
@@ -25,29 +24,6 @@ class BrowserOutputObservation(Observation):
     last_browser_action: str = ''
     focused_element_bid: str = ''
 
-    def to_dict(self):
-        dictionary = super().to_dict()
-        # add screenshot for frontend showcase only, not for agent consumption
-        dictionary['screenshot'] = self.screenshot
-        return dictionary
-
-    def to_memory(self) -> dict:
-        memory_dict = super().to_memory()
-        # remove some fields from the memory, as currently they are too big for LLMs
-        remove_fields(
-            memory_dict['extras'],
-            {
-                'screenshot',
-                'dom_object',
-                'axtree_object',
-                'open_pages_urls',
-                'active_page_index',
-                'last_browser_action',
-                'focused_element_bid',
-            },
-        )
-        return memory_dict
-
     @property
     def message(self) -> str:
         return 'Visited ' + self.url
diff --git a/opendevin/events/observation/observation.py b/opendevin/events/observation/observation.py
@@ -6,14 +6,3 @@
 @dataclass
 class Observation(Event):
     content: str
-
-    def to_memory(self) -> dict:
-        """Converts the observation to a dictionary."""
-        extras = super().to_memory()
-        content = extras.pop('content', '')
-        observation = extras.pop('observation', '')
-        return {
-            'observation': observation,
-            'content': content,
-            'extras': extras,
-        }
diff --git a/opendevin/events/serialization/__init__.py b/opendevin/events/serialization/__init__.py
@@ -0,0 +1,21 @@
+from .action import (
+    action_from_dict,
+)
+from .event import (
+    event_from_dict,
+    event_to_dict,
+    event_to_json,
+    event_to_memory,
+)
+from .observation import (
+    observation_from_dict,
+)
+
+__all__ = [
+    'action_from_dict',
+    'event_from_dict',
+    'event_to_dict',
+    'event_to_json',
+    'event_to_memory',
+    'observation_from_dict',
+]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -204,3 +204,4 @@ cache
		config.toml

		test_results*
		/_test_files_tmp/