Skip to content

Commit

Permalink
fix safeguard
Browse files Browse the repository at this point in the history
  • Loading branch information
vyokky committed Jul 5, 2024
1 parent 3e99295 commit 0666afb
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 107 deletions.
16 changes: 16 additions & 0 deletions ufo/agents/agent/app_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ufo.agents.states.app_agent_state import AppAgentStatus, ContinueAppAgentState
from ufo.automator import puppeteer
from ufo.config.config import Config
from ufo.module import interactor
from ufo.module.context import Context
from ufo.prompter.agent_prompter import AppAgentPrompter

Expand Down Expand Up @@ -301,6 +302,21 @@ def create_puppteer_interface(self) -> puppeteer.AppPuppeteer:
"""
return puppeteer.AppPuppeteer(self._process_name, self._app_root_name)

def process_comfirmation(self) -> bool:
"""
Process the user confirmation.
:return: The decision.
"""
action = self.processor.action
control_text = self.processor.control_text

decision = interactor.sensitive_step_asker(action, control_text)

if not decision:
utils.print_with_color("The user has canceled the action.", "red")

return decision

@property
def status_manager(self) -> AppAgentStatus:
"""
Expand Down
7 changes: 7 additions & 0 deletions ufo/agents/agent/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,13 @@ def process_asker(self) -> None:

self.blackboard.add_questions(qa_pair)

@abstractmethod
def process_comfirmation(self) -> None:
"""
Confirm the process.
"""
pass

@property
def processor(self) -> BaseProcessor:
"""
Expand Down
6 changes: 6 additions & 0 deletions ufo/agents/agent/evaluation_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ def evaluate(self, request: str, log_path: str) -> Tuple[Dict[str, str], float]:

return result, cost

def process_comfirmation(self) -> None:
"""
Comfirmation, currently do nothing.
"""
pass

def print_response(self, response_dict: Dict[str, Any]) -> None:
"""
Print the response of the evaluation.
Expand Down
6 changes: 6 additions & 0 deletions ufo/agents/agent/host_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,12 @@ def process(self, context: Context) -> None:
self.processor.process()
self.status = self.processor.status

def process_comfirmation(self) -> None:
"""
TODO: Process the confirmation.
"""
pass

def print_response(self, response_dict: Dict) -> None:
"""
Print the response.
Expand Down
18 changes: 0 additions & 18 deletions ufo/agents/processors/app_agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,9 @@ def __init__(self, agent: "AppAgent", context: Context) -> None:
self._operation = None
self._args = None
self._image_url = []
self._plan = []
self.action = ""
self.control_filter_factory = ControlFilterFactory()
self.filtered_annotation_dict = None

@property
def plan(self) -> str:
"""
Get the plan.
:return: The plan.
"""
return self._plan

@plan.setter
def plan(self, plan: List[str]) -> None:
"""
Set the plan.
:param plan: The plan.
"""
self._plan = plan

@property
def action(self) -> str:
"""
Expand Down
34 changes: 34 additions & 0 deletions ufo/agents/processors/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(self, agent: BasicAgent, context: Context) -> None:
self._question_list = []
self._agent_status_manager = self.agent.status_manager
self._is_resumed = False
self._action = None
self._plan = None

def process(self) -> None:
"""
Expand Down Expand Up @@ -418,6 +420,38 @@ def status(self) -> str:
"""
return self._status

@property
def action(self) -> str:
"""
Get the action.
:return: The action.
"""
return self._action

@action.setter
def action(self, action: str) -> None:
"""
Set the action.
:param action: The action.
"""
self._action = action

@property
def plan(self) -> str:
"""
Get the plan of the agent.
:return: The plan.
"""
return self._plan

@plan.setter
def plan(self, plan: str) -> None:
"""
Set the plan of the agent.
:param plan: The plan.
"""
self._plan = plan

@property
def log_path(self) -> str:
"""
Expand Down
8 changes: 5 additions & 3 deletions ufo/agents/processors/host_agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ def parse_response(self) -> None:
self.host_message = self._response_json.get("Message", [])

# Convert the plan from a string to a list if the plan is a string.
self._plan = self.string2list(self._response_json.get("Plan", ""))
self._response_json["Plan"] = self._plan
self.plan = self.string2list(self._response_json.get("Plan", ""))
self._response_json["Plan"] = self.plan

self.status = self._response_json.get("Status", "")
self.question_list = self._response_json.get("Questions", [])
Expand Down Expand Up @@ -185,6 +185,8 @@ def execute_action(self) -> None:
self.application_window.set_focus()
self.application_window.draw_outline(colour="red", thickness=3)

self.action = "set_focus()"

def is_window_interface_available(self, new_app_window: UIAWrapper) -> bool:
"""
Check if the window interface is available for the visual element.
Expand Down Expand Up @@ -235,7 +237,7 @@ def update_memory(self) -> None:
"Round": self.round_num,
"ControlLabel": self.control_text,
"SubtaskIndex": -1,
"Action": "set_focus()",
"Action": self.action,
"ActionType": "UIControl",
"Request": self.request,
"Agent": "HostAgent",
Expand Down
93 changes: 10 additions & 83 deletions ufo/agents/states/app_agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from enum import Enum
from typing import TYPE_CHECKING, Dict, Optional, Type

from ufo import utils
from ufo.agents.agent.basic import BasicAgent
from ufo.agents.states.basic import AgentState, AgentStateManager
from ufo.agents.states.host_agent_state import (
Expand All @@ -15,7 +14,6 @@
NoneHostAgentState,
)
from ufo.config.config import Config
from ufo.module import interactor
from ufo.module.context import Context, ContextNames

# Avoid circular import
Expand All @@ -39,7 +37,6 @@ class AppAgentStatus(Enum):
FAIL = "FAIL"
PENDING = "PENDING"
CONFIRM = "CONFIRM"
SWITCH = "SWITCH"
SCREENSHOT = "SCREENSHOT"


Expand Down Expand Up @@ -245,54 +242,6 @@ def is_subtask_end(self) -> bool:
return False


@AppAgentStateManager.register
class SwitchAppAgentState(AppAgentState):
"""
The class for the switch app agent state.
"""

def handle(self, agent: "AppAgent", context: Optional["Context"] = None) -> None:
"""
Handle the agent for the current step.
:param agent: The agent for the current step.
:param context: The context for the agent and session.
"""

self.archive_subtask(context)

def next_state(self, agent: "AppAgent") -> HostAgentState:
"""
The next state of the agent.
:param agent: The agent for the current step.
:return: The state for the next step.
"""

return ContinueHostAgentState()

def next_agent(self, agent: "AppAgent") -> BasicAgent:
"""
Get the agent for the next step.
:param agent: The agent for the current step.
:return: The agent for the next step.
"""
return agent.host

def is_subtask_end(self) -> bool:
"""
Check if the subtask ends.
:return: True if the subtask ends, False otherwise.
"""
return True

@classmethod
def name(cls) -> str:
"""
The class name of the state.
:return: The name of the state.
"""
return AppAgentStatus.SWITCH.value


@AppAgentStateManager.register
class PendingAppAgentState(AppAgentState):
"""
Expand Down Expand Up @@ -360,23 +309,17 @@ def handle(self, agent: "AppAgent", context: Optional["Context"] = None) -> None

return

agent_processor = agent.processor

if agent_processor is None:
utils.print_with_color("The agent processor is None.", "red")
return

# Get the action and control text from the agent processor to ask the user whether to proceed with the action.
action = agent_processor.action
control_text = agent_processor.control_text

self._confirm = self.user_confirm(action=action, control_text=control_text)

self._confirm = agent.process_comfirmation()
# If the user confirms the action, the agent should resume the task.
if self._confirm:
agent.process_resume()

def next_state(self, agent: AppAgent) -> AppAgentState:
"""
Get the next state of the agent.
:param agent: The agent for the current step.
:return: The state for the next step.
"""

plan = agent.processor.plan

Expand All @@ -391,23 +334,7 @@ def next_state(self, agent: AppAgent) -> AppAgentState:
return ContinueAppAgentState()
else:
agent.status = AppAgentStatus.FINISH.value
return FinishAppAgentState()

def user_confirm(self, action: str, control_text: str) -> bool:
"""
Ask the user whether to proceed with the action when the status is CONFIRM.
:param action: The action to be confirmed.
:param control_text: The control text for the action.
:return: True if the user confirms the action, False otherwise.
"""

# Ask the user whether to proceed with the action when the status is PENDING.
decision = interactor.sensitive_step_asker(action, control_text)
if not decision:
utils.print_with_color("The user decide to stop the task.", "magenta")
return False

return True
return FinishHostAgentState()

def is_subtask_end(self) -> bool:
"""
Expand Down Expand Up @@ -454,7 +381,7 @@ def next_state(self, agent: "AppAgent") -> HostAgentState:
:param agent: The agent for the current step.
:return: The state for the next step.
"""
return FinishHostAgentState
return FinishHostAgentState()

def is_round_end(self) -> bool:
"""
Expand Down Expand Up @@ -508,7 +435,7 @@ def next_state(self, agent: "AppAgent") -> HostAgentState:
:param agent: The agent for the current step.
:return: The state for the next step.
"""
return FinishHostAgentState
return FinishHostAgentState()

def is_round_end(self) -> bool:
"""
Expand Down Expand Up @@ -553,7 +480,7 @@ def next_state(self, agent: "AppAgent") -> HostAgentState:
:param agent: The agent for the current step.
:return: The state for the next step.
"""
return NoneHostAgentState
return NoneHostAgentState()

def is_subtask_end(self) -> bool:
"""
Expand Down
9 changes: 7 additions & 2 deletions ufo/automator/ui_control/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,17 @@ def set_edit_text(self, params: Dict[str, str]) -> str:
:return: The result of the set edit text action.
"""

text = params.get("text", "")

if configs["INPUT_TEXT_API"] == "set_text":
method_name = "set_edit_text"
args = {"text": params["text"]}
args = {"text": text}
else:
method_name = "type_keys"
args = {"keys": params["text"], "pause": 0.1, "with_spaces": True}
text = text.replace("\n", "{ENTER}")
text = text.replace("\t", "{TAB}")

args = {"keys": text, "pause": 0.1, "with_spaces": True}
try:
result = self.atomic_execution(method_name, args)
if (
Expand Down
2 changes: 1 addition & 1 deletion ufo/config/config_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ MAX_STEP: 100 # The max step limit for completing the user request
SLEEP_TIME: 5 # The sleep time between each step to wait for the window to be ready
RECTANGLE_TIME: 1

SAFE_GUARD: True # Whether to use the safe guard to prevent the model from doing sensitve operations.
SAFE_GUARD: False # Whether to use the safe guard to prevent the model from doing sensitve operations.
CONTROL_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton", "DataItem"]
# The list of widgets that allowed to be selected, in uia backend, it will be used for filter the control_type, while in win32 backend, it will be used for filter the class_name.
HISTORY_KEYS: ["Step", "Thought", "ControlText", "Subtask", "Action", "Comment", "Results", "UserConfirm"] # The keys of the action history for the next step.
Expand Down

0 comments on commit 0666afb

Please sign in to comment.