Docker multilanguage executor saver with policy (microsoft#2522)

* feat: update executor saver policy * feat: languages * feat: add test _cmd * fix: try catch * fix: log * fix: test docker mock * fix: invalid path test * fix: invalid path message * fix: invalid path message * fix: is_docker test * fix: delete old test * fix: cmd lang
websmyths · May 1, 2024 · 5fdaf1a · 5fdaf1a
1 parent 83f9f3e
commit 5fdaf1a
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 22 deletions.
diff --git a/autogen/code_utils.py b/autogen/code_utils.py
@@ -251,6 +251,8 @@ def _cmd(lang: str) -> str:
         return lang
     if lang in ["shell"]:
         return "sh"
+    if lang == "javascript":
+        return "node"
     if lang in ["ps1", "pwsh", "powershell"]:
         powershell_command = get_powershell_command()
         return powershell_command

diff --git a/autogen/coding/docker_commandline_code_executor.py b/autogen/coding/docker_commandline_code_executor.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from time import sleep
 from types import TracebackType
-from typing import Any, List, Optional, Type, Union
+from typing import Any, ClassVar, Dict, List, Optional, Type, Union
 
 import docker
 from docker.errors import ImageNotFound
@@ -39,6 +39,20 @@ def _wait_for_ready(container: Any, timeout: int = 60, stop_time: float = 0.1) -
 
 
 class DockerCommandLineCodeExecutor(CodeExecutor):
+    DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
+        "bash": True,
+        "shell": True,
+        "sh": True,
+        "pwsh": True,
+        "powershell": True,
+        "ps1": True,
+        "python": True,
+        "javascript": False,
+        "html": False,
+        "css": False,
+    }
+    LANGUAGE_ALIASES: ClassVar[Dict[str, str]] = {"py": "python", "js": "javascript"}
+
     def __init__(
         self,
         image: str = "python:3-slim",
@@ -48,6 +62,7 @@ def __init__(
         bind_dir: Optional[Union[Path, str]] = None,
         auto_remove: bool = True,
         stop_container: bool = True,
+        execution_policies: Optional[Dict[str, bool]] = None,
     ):
         """(Experimental) A code executor class that executes code through
         a command line environment in a Docker container.
@@ -80,13 +95,11 @@ def __init__(
         Raises:
             ValueError: On argument error, or if the container fails to start.
         """
-
         if timeout < 1:
             raise ValueError("Timeout must be greater than or equal to 1.")
 
         if isinstance(work_dir, str):
             work_dir = Path(work_dir)
-
         work_dir.mkdir(exist_ok=True)
 
         if bind_dir is None:
@@ -95,7 +108,6 @@ def __init__(
             bind_dir = Path(bind_dir)
 
         client = docker.from_env()
-
         # Check if the image exists
         try:
             client.images.get(image)
@@ -127,7 +139,6 @@ def cleanup() -> None:
                 container.stop()
             except docker.errors.NotFound:
                 pass
-
             atexit.unregister(cleanup)
 
         if stop_container:
@@ -142,6 +153,9 @@ def cleanup() -> None:
         self._timeout = timeout
         self._work_dir: Path = work_dir
         self._bind_dir: Path = bind_dir
+        self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
+        if execution_policies is not None:
+            self.execution_policies.update(execution_policies)
 
     @property
     def timeout(self) -> int:
@@ -179,35 +193,42 @@ def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeRe
         files = []
         last_exit_code = 0
         for code_block in code_blocks:
-            lang = code_block.language
+            lang = self.LANGUAGE_ALIASES.get(code_block.language.lower(), code_block.language.lower())
+            if lang not in self.DEFAULT_EXECUTION_POLICY:
+                outputs.append(f"Unsupported language {lang}\n")
+                last_exit_code = 1
+                break
+
+            execute_code = self.execution_policies.get(lang, False)
             code = silence_pip(code_block.code, lang)
 
+            # Check if there is a filename comment
             try:
-                # Check if there is a filename comment
-                filename = _get_file_name_from_content(code, Path("/workspace"))
+                filename = _get_file_name_from_content(code, self._work_dir)
             except ValueError:
-                return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")
+                outputs.append("Filename is not in the workspace")
+                last_exit_code = 1
+                break
 
-            if filename is None:
-                # create a file with an automatically generated name
-                code_hash = md5(code.encode()).hexdigest()
-                filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
+            if not filename:
+                filename = f"tmp_code_{md5(code.encode()).hexdigest()}.{lang}"
 
             code_path = self._work_dir / filename
             with code_path.open("w", encoding="utf-8") as fout:
                 fout.write(code)
+            files.append(code_path)
 
-            command = ["timeout", str(self._timeout), _cmd(lang), filename]
+            if not execute_code:
+                outputs.append(f"Code saved to {str(code_path)}\n")
+                continue
 
+            command = ["timeout", str(self._timeout), _cmd(lang), filename]
             result = self._container.exec_run(command)
             exit_code = result.exit_code
             output = result.output.decode("utf-8")
             if exit_code == 124:
-                output += "\n"
-                output += TIMEOUT_MSG
-
+                output += "\n" + TIMEOUT_MSG
             outputs.append(output)
-            files.append(code_path)
 
             last_exit_code = exit_code
             if exit_code != 0:

diff --git a/test/coding/test_commandline_code_executor.py b/test/coding/test_commandline_code_executor.py
@@ -143,16 +143,18 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
             assert file_line.strip() == code_line.strip()
 
 
-def test_local_commandline_code_executor_save_files() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_local_commandline_code_executor_save_files(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
+        executor = cls(work_dir=temp_dir)
         _test_save_files(executor, save_file_only=False)
 
 
-def test_local_commandline_code_executor_save_files_only() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_local_commandline_code_executor_save_files_only(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         # Using execution_policies to specify that no languages should execute
-        executor = LocalCommandLineCodeExecutor(
+        executor = cls(
             work_dir=temp_dir,
             execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
         )
@@ -255,6 +257,31 @@ def test_docker_commandline_code_executor_restart() -> None:
         assert result.exit_code == 0
 
 
+@pytest.mark.skipif(
+    skip_docker_test,
+    reason="docker is not running or requested to skip docker tests",
+)
+def test_policy_override():
+    default_policy = DockerCommandLineCodeExecutor.DEFAULT_EXECUTION_POLICY
+    custom_policy = {
+        "python": False,
+        "javascript": True,
+    }
+
+    executor = DockerCommandLineCodeExecutor(execution_policies=custom_policy)
+
+    assert not executor.execution_policies["python"], "Python execution should be disabled"
+    assert executor.execution_policies["javascript"], "JavaScript execution should be enabled"
+
+    for lang, should_execute in default_policy.items():
+        if lang not in custom_policy:
+            assert executor.execution_policies[lang] == should_execute, f"Policy for {lang} should not be changed"
+
+    assert set(executor.execution_policies.keys()) == set(
+        default_policy.keys()
+    ), "Execution policies should only contain known languages"
+
+
 def _test_restart(executor: CodeExecutor) -> None:
     # Check warning.
     with pytest.warns(UserWarning, match=r".*No action is taken."):