Skip to content

Commit

Permalink
Docker multilanguage executor saver with policy (microsoft#2522)
Browse files Browse the repository at this point in the history
* feat: update executor saver policy

* feat: languages

* feat: add test _cmd

* fix: try catch

* fix: log

* fix: test docker mock

* fix: invalid path test

* fix: invalid path message

* fix: invalid path message

* fix: is_docker test

* fix: delete old test

* fix: cmd lang
  • Loading branch information
asandez1 authored May 1, 2024
1 parent 83f9f3e commit 5fdaf1a
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 22 deletions.
2 changes: 2 additions & 0 deletions autogen/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ def _cmd(lang: str) -> str:
return lang
if lang in ["shell"]:
return "sh"
if lang == "javascript":
return "node"
if lang in ["ps1", "pwsh", "powershell"]:
powershell_command = get_powershell_command()
return powershell_command
Expand Down
57 changes: 39 additions & 18 deletions autogen/coding/docker_commandline_code_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path
from time import sleep
from types import TracebackType
from typing import Any, List, Optional, Type, Union
from typing import Any, ClassVar, Dict, List, Optional, Type, Union

import docker
from docker.errors import ImageNotFound
Expand Down Expand Up @@ -39,6 +39,20 @@ def _wait_for_ready(container: Any, timeout: int = 60, stop_time: float = 0.1) -


class DockerCommandLineCodeExecutor(CodeExecutor):
DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
"bash": True,
"shell": True,
"sh": True,
"pwsh": True,
"powershell": True,
"ps1": True,
"python": True,
"javascript": False,
"html": False,
"css": False,
}
LANGUAGE_ALIASES: ClassVar[Dict[str, str]] = {"py": "python", "js": "javascript"}

def __init__(
self,
image: str = "python:3-slim",
Expand All @@ -48,6 +62,7 @@ def __init__(
bind_dir: Optional[Union[Path, str]] = None,
auto_remove: bool = True,
stop_container: bool = True,
execution_policies: Optional[Dict[str, bool]] = None,
):
"""(Experimental) A code executor class that executes code through
a command line environment in a Docker container.
Expand Down Expand Up @@ -80,13 +95,11 @@ def __init__(
Raises:
ValueError: On argument error, or if the container fails to start.
"""

if timeout < 1:
raise ValueError("Timeout must be greater than or equal to 1.")

if isinstance(work_dir, str):
work_dir = Path(work_dir)

work_dir.mkdir(exist_ok=True)

if bind_dir is None:
Expand All @@ -95,7 +108,6 @@ def __init__(
bind_dir = Path(bind_dir)

client = docker.from_env()

# Check if the image exists
try:
client.images.get(image)
Expand Down Expand Up @@ -127,7 +139,6 @@ def cleanup() -> None:
container.stop()
except docker.errors.NotFound:
pass

atexit.unregister(cleanup)

if stop_container:
Expand All @@ -142,6 +153,9 @@ def cleanup() -> None:
self._timeout = timeout
self._work_dir: Path = work_dir
self._bind_dir: Path = bind_dir
self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
if execution_policies is not None:
self.execution_policies.update(execution_policies)

@property
def timeout(self) -> int:
Expand Down Expand Up @@ -179,35 +193,42 @@ def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeRe
files = []
last_exit_code = 0
for code_block in code_blocks:
lang = code_block.language
lang = self.LANGUAGE_ALIASES.get(code_block.language.lower(), code_block.language.lower())
if lang not in self.DEFAULT_EXECUTION_POLICY:
outputs.append(f"Unsupported language {lang}\n")
last_exit_code = 1
break

execute_code = self.execution_policies.get(lang, False)
code = silence_pip(code_block.code, lang)

# Check if there is a filename comment
try:
# Check if there is a filename comment
filename = _get_file_name_from_content(code, Path("/workspace"))
filename = _get_file_name_from_content(code, self._work_dir)
except ValueError:
return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")
outputs.append("Filename is not in the workspace")
last_exit_code = 1
break

if filename is None:
# create a file with an automatically generated name
code_hash = md5(code.encode()).hexdigest()
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
if not filename:
filename = f"tmp_code_{md5(code.encode()).hexdigest()}.{lang}"

code_path = self._work_dir / filename
with code_path.open("w", encoding="utf-8") as fout:
fout.write(code)
files.append(code_path)

command = ["timeout", str(self._timeout), _cmd(lang), filename]
if not execute_code:
outputs.append(f"Code saved to {str(code_path)}\n")
continue

command = ["timeout", str(self._timeout), _cmd(lang), filename]
result = self._container.exec_run(command)
exit_code = result.exit_code
output = result.output.decode("utf-8")
if exit_code == 124:
output += "\n"
output += TIMEOUT_MSG

output += "\n" + TIMEOUT_MSG
outputs.append(output)
files.append(code_path)

last_exit_code = exit_code
if exit_code != 0:
Expand Down
35 changes: 31 additions & 4 deletions test/coding/test_commandline_code_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,18 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
assert file_line.strip() == code_line.strip()


def test_local_commandline_code_executor_save_files() -> None:
@pytest.mark.parametrize("cls", classes_to_test)
def test_local_commandline_code_executor_save_files(cls) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
executor = cls(work_dir=temp_dir)
_test_save_files(executor, save_file_only=False)


def test_local_commandline_code_executor_save_files_only() -> None:
@pytest.mark.parametrize("cls", classes_to_test)
def test_local_commandline_code_executor_save_files_only(cls) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
# Using execution_policies to specify that no languages should execute
executor = LocalCommandLineCodeExecutor(
executor = cls(
work_dir=temp_dir,
execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
)
Expand Down Expand Up @@ -255,6 +257,31 @@ def test_docker_commandline_code_executor_restart() -> None:
assert result.exit_code == 0


@pytest.mark.skipif(
skip_docker_test,
reason="docker is not running or requested to skip docker tests",
)
def test_policy_override():
default_policy = DockerCommandLineCodeExecutor.DEFAULT_EXECUTION_POLICY
custom_policy = {
"python": False,
"javascript": True,
}

executor = DockerCommandLineCodeExecutor(execution_policies=custom_policy)

assert not executor.execution_policies["python"], "Python execution should be disabled"
assert executor.execution_policies["javascript"], "JavaScript execution should be enabled"

for lang, should_execute in default_policy.items():
if lang not in custom_policy:
assert executor.execution_policies[lang] == should_execute, f"Policy for {lang} should not be changed"

assert set(executor.execution_policies.keys()) == set(
default_policy.keys()
), "Execution policies should only contain known languages"


def _test_restart(executor: CodeExecutor) -> None:
# Check warning.
with pytest.warns(UserWarning, match=r".*No action is taken."):
Expand Down

0 comments on commit 5fdaf1a

Please sign in to comment.