syntax-syndicate
diff --git a/‎config.yaml.example
Lines changed: 37 additions & 14 deletions b/‎config.yaml.example
Lines changed: 37 additions & 14 deletions
diff --git a/‎src/codegate/cli.py
Lines changed: 33 additions & 1 deletion b/‎src/codegate/cli.py
Lines changed: 33 additions & 1 deletion
diff --git a/‎src/codegate/config.py
Lines changed: 31 additions & 4 deletions b/‎src/codegate/config.py
Lines changed: 31 additions & 4 deletions
diff --git a/‎src/codegate/pipeline/fim/secret_analyzer.py
Lines changed: 2 additions & 4 deletions b/‎src/codegate/pipeline/fim/secret_analyzer.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/codegate/providers/__init__.py
Lines changed: 2 additions & 0 deletions b/‎src/codegate/providers/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/codegate/providers/anthropic/completion_handler.py
Lines changed: 3 additions & 3 deletions b/‎src/codegate/providers/anthropic/completion_handler.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/codegate/providers/anthropic/provider.py
Lines changed: 5 additions & 5 deletions b/‎src/codegate/providers/anthropic/provider.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/codegate/providers/base.py
Lines changed: 9 additions & 11 deletions b/‎src/codegate/providers/base.py
Lines changed: 9 additions & 11 deletions
diff --git a/‎src/codegate/providers/llamacpp/completion_handler.py
Lines changed: 14 additions & 10 deletions b/‎src/codegate/providers/llamacpp/completion_handler.py
Lines changed: 14 additions & 10 deletions
@@ -1,19 +1,42 @@
-# Example configuration file
-# Copy this file to config.yaml and modify as needed
+# Codegate Example Configuration
 
-# Server configuration
-port: 8989
-host: "localhost"
+# Network settings
+port: 8989           # Port to listen on (1-65535)
+host: "localhost"      # Host to bind to (use localhost for all interfaces)
 
 # Logging configuration
-log_level: "INFO"  # ERROR, WARNING, INFO, DEBUG
-log_format: "JSON" # JSON, TEXT
+log_level: "INFO"  # One of: ERROR, WARNING, INFO, DEBUG
 
-# Prompts configuration
-# Option 1: Define prompts directly in the config file
-prompts:
-  my_system_prompt: "Custom system prompt defined in config"
-  another_prompt: "Another custom prompt"
+# Note: This configuration can be overridden by:
+# 1. CLI arguments (--port, --host, --log-level)
+# 2. Environment variables (CODEGATE_APP_PORT, CODEGATE_APP_HOST, CODEGATE_APP_LOG_LEVEL)
 
-# Option 2: Reference a separate prompts file
-# prompts: "prompts.yaml"  # Path to prompts file (relative to config file or absolute)
+# Provider URLs
+provider_urls:
+  openai: "https://api.openai.com/v1"
+  anthropic: "https://api.anthropic.com/v1"
+  vllm: "http://localhost:8000"  # Base URL without /v1 path, it will be added automatically
+
+# Note: Provider URLs can be overridden by environment variables:
+# CODEGATE_PROVIDER_OPENAI_URL
+# CODEGATE_PROVIDER_ANTHROPIC_URL
+# CODEGATE_PROVIDER_VLLM_URL
+# Or by CLI flags:
+# --vllm-url
+# --openai-url
+# --anthropic-url
+
+# Embedding model configuration
+
+####
+# Inference model configuration
+##
+
+# Model to use for chatting
+chat_model_path: "./models/qwen2.5-coder-1.5b-instruct-q5_k_m.gguf"
+
+# Context length of the model
+chat_model_n_ctx: 32768
+
+# Number of layers to offload to GPU. If -1, all layers are offloaded.
+chat_model_n_gpu_layers: -1
@@ -2,7 +2,7 @@
 
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import Dict, Optional
 
 import click
 
@@ -88,17 +88,47 @@ def show_prompts(prompts: Optional[Path]) -> None:
     default=None,
     help="Path to YAML prompts file",
 )
+@click.option(
+    "--vllm-url",
+    type=str,
+    default=None,
+    help="vLLM provider URL (default: http://localhost:8000/v1)",
+)
+@click.option(
+    "--openai-url",
+    type=str,
+    default=None,
+    help="OpenAI provider URL (default: https://api.openai.com/v1)",
+)
+@click.option(
+    "--anthropic-url",
+    type=str,
+    default=None,
+    help="Anthropic provider URL (default: https://api.anthropic.com/v1)",
+)
 def serve(
     port: Optional[int],
     host: Optional[str],
     log_level: Optional[str],
     log_format: Optional[str],
     config: Optional[Path],
     prompts: Optional[Path],
+    vllm_url: Optional[str],
+    openai_url: Optional[str],
+    anthropic_url: Optional[str],
 ) -> None:
     """Start the codegate server."""
     logger = None
     try:
+        # Create provider URLs dict from CLI options
+        cli_provider_urls: Dict[str, str] = {}
+        if vllm_url:
+            cli_provider_urls["vllm"] = vllm_url
+        if openai_url:
+            cli_provider_urls["openai"] = openai_url
+        if anthropic_url:
+            cli_provider_urls["anthropic"] = anthropic_url
+
         # Load configuration with priority resolution
         cfg = Config.load(
             config_path=config,
@@ -107,6 +137,7 @@ def serve(
             cli_host=host,
             cli_log_level=log_level,
             cli_log_format=log_format,
+            cli_provider_urls=cli_provider_urls,
         )
 
         logger = setup_logging(cfg.log_level, cfg.log_format)
@@ -118,6 +149,7 @@ def serve(
                 "log_level": cfg.log_level.value,
                 "log_format": cfg.log_format.value,
                 "prompts_loaded": len(cfg.prompts.prompts),
+                "provider_urls": cfg.provider_urls,
             },
         )
 
 
@@ -3,7 +3,7 @@
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Optional, Union
+from typing import Dict, Optional, Union
 
 import yaml
 
@@ -32,6 +32,15 @@ class Config:
     chat_model_n_ctx: int = 32768
     chat_model_n_gpu_layers: int = -1
 
+    # Provider URLs with defaults
+    provider_urls: Dict[str, str] = field(
+        default_factory=lambda: {
+            "openai": "https://api.openai.com/v1",
+            "anthropic": "https://api.anthropic.com/v1",
+            "vllm": "http://localhost:8000",  # Base URL without /v1 path
+        }
+    )
+
     def __post_init__(self) -> None:
         """Validate configuration after initialization."""
         if not isinstance(self.port, int) or not (1 <= self.port <= 65535):
@@ -95,19 +104,23 @@ def from_file(cls, config_path: Union[str, Path]) -> "Config":
                         prompts_path = Path(config_path).parent / prompts_path
                     prompts_config = PromptConfig.from_file(prompts_path)
 
+            # Get provider URLs from config
+            provider_urls = cls.provider_urls.copy()
+            if "provider_urls" in config_data:
+                provider_urls.update(config_data.pop("provider_urls"))
+
             return cls(
                 port=config_data.get("port", cls.port),
                 host=config_data.get("host", cls.host),
                 log_level=config_data.get("log_level", cls.log_level.value),
                 log_format=config_data.get("log_format", cls.log_format.value),
                 model_base_path=config_data.get("chat_model_path", cls.model_base_path),
-                chat_model_n_ctx=config_data.get(
-                    "chat_model_n_ctx", cls.chat_model_n_ctx
-                ),
+                chat_model_n_ctx=config_data.get("chat_model_n_ctx", cls.chat_model_n_ctx),
                 chat_model_n_gpu_layers=config_data.get(
                     "chat_model_n_gpu_layers", cls.chat_model_n_gpu_layers
                 ),
                 prompts=prompts_config,
+                provider_urls=provider_urls,
             )
         except yaml.YAMLError as e:
             raise ConfigurationError(f"Failed to parse config file: {e}")
@@ -138,6 +151,12 @@ def from_env(cls) -> "Config":
                     os.environ["CODEGATE_PROMPTS_FILE"]
                 )  # noqa: E501
 
+            # Load provider URLs from environment variables
+            for provider in config.provider_urls.keys():
+                env_var = f"CODEGATE_PROVIDER_{provider.upper()}_URL"
+                if env_var in os.environ:
+                    config.provider_urls[provider] = os.environ[env_var]
+
             return config
         except ValueError as e:
             raise ConfigurationError(f"Invalid environment variable value: {e}")
@@ -151,6 +170,7 @@ def load(
         cli_host: Optional[str] = None,
         cli_log_level: Optional[str] = None,
         cli_log_format: Optional[str] = None,
+        cli_provider_urls: Optional[Dict[str, str]] = None,
     ) -> "Config":
         """Load configuration with priority resolution.
 
@@ -167,6 +187,7 @@ def load(
             cli_host: Optional CLI host override
             cli_log_level: Optional CLI log level override
             cli_log_format: Optional CLI log format override
+            cli_provider_urls: Optional dict of provider URLs from CLI
 
         Returns:
             Config: Resolved configuration
@@ -198,6 +219,10 @@ def load(
         if "CODEGATE_PROMPTS_FILE" in os.environ:
             config.prompts = env_config.prompts
 
+        # Override provider URLs from environment
+        for provider, url in env_config.provider_urls.items():
+            config.provider_urls[provider] = url
+
         # Override with CLI arguments
         if cli_port is not None:
             config.port = cli_port
@@ -209,6 +234,8 @@ def load(
             config.log_format = LogFormat(cli_log_format)
         if prompts_path is not None:
             config.prompts = PromptConfig.from_file(prompts_path)
+        if cli_provider_urls is not None:
+            config.provider_urls.update(cli_provider_urls)
 
         # Set the __config class attribute
         Config.__config = config
 
@@ -28,9 +28,7 @@ def name(self) -> str:
         return "fim-secret-analyzer"
 
     async def process(
-            self,
-            request: ChatCompletionRequest,
-            context: PipelineContext
+        self, request: ChatCompletionRequest, context: PipelineContext
     ) -> PipelineResult:
         # We should call here Secrets Blocking module to see if the request messages contain secrets
         # messages_contain_secrets = [analyze_msg_secrets(msg) for msg in request.messages]
@@ -39,7 +37,7 @@ async def process(
         # For the moment to test shortcutting just treat all messages as if they contain secrets
         message_with_secrets = False
         if message_with_secrets:
-            logger.info('Blocking message with secrets.')
+            logger.info("Blocking message with secrets.")
             return PipelineResult(
                 response=PipelineResponse(
                     step_name=self.name,
 
@@ -2,10 +2,12 @@
 from codegate.providers.base import BaseProvider
 from codegate.providers.openai.provider import OpenAIProvider
 from codegate.providers.registry import ProviderRegistry
+from codegate.providers.vllm.provider import VLLMProvider
 
 __all__ = [
     "BaseProvider",
     "ProviderRegistry",
     "OpenAIProvider",
     "AnthropicProvider",
+    "VLLMProvider",
 ]
@@ -27,7 +27,7 @@ async def execute_completion(
         For more details, refer to the
         [LiteLLM Documentation](https://docs.litellm.ai/docs/providers/anthropic).
         """
-        model_in_request = request['model']
-        if not model_in_request.startswith('anthropic/'):
-            request['model'] = f'anthropic/{model_in_request}'
+        model_in_request = request["model"]
+        if not model_in_request.startswith("anthropic/"):
+            request["model"] = f"anthropic/{model_in_request}"
         return await super().execute_completion(request, api_key, stream)
@@ -11,17 +11,17 @@
 
 class AnthropicProvider(BaseProvider):
     def __init__(
-                self,
-                pipeline_processor: Optional[SequentialPipelineProcessor] = None,
-                fim_pipeline_processor: Optional[SequentialPipelineProcessor] = None
-            ):
+        self,
+        pipeline_processor: Optional[SequentialPipelineProcessor] = None,
+        fim_pipeline_processor: Optional[SequentialPipelineProcessor] = None,
+    ):
         completion_handler = AnthropicCompletion(stream_generator=anthropic_stream_generator)
         super().__init__(
             AnthropicInputNormalizer(),
             AnthropicOutputNormalizer(),
             completion_handler,
             pipeline_processor,
-            fim_pipeline_processor
+            fim_pipeline_processor,
         )
 
     @property
 
@@ -50,17 +50,15 @@ def provider_route_name(self) -> str:
         pass
 
     async def _run_input_pipeline(
-        self,
-        normalized_request: ChatCompletionRequest,
-        is_fim_request: bool
+        self, normalized_request: ChatCompletionRequest, is_fim_request: bool
     ) -> PipelineResult:
         # Decide which pipeline processor to use
         if is_fim_request:
             pipeline_processor = self._fim_pipelin_processor
-            logger.info('FIM pipeline selected for execution.')
+            logger.info("FIM pipeline selected for execution.")
         else:
             pipeline_processor = self._pipeline_processor
-            logger.info('Chat completion pipeline selected for execution.')
+            logger.info("Chat completion pipeline selected for execution.")
         if pipeline_processor is None:
             return PipelineResult(request=normalized_request)
 
@@ -92,21 +90,21 @@ def _is_fim_request_body(self, data: Dict) -> bool:
         Determine from the raw incoming data if it's a FIM request.
         Used by: OpenAI and Anthropic
         """
-        messages = data.get('messages', [])
+        messages = data.get("messages", [])
         if not messages:
             return False
 
-        first_message_content = messages[0].get('content')
+        first_message_content = messages[0].get("content")
         if first_message_content is None:
             return False
 
-        fim_stop_sequences = ['</COMPLETION>', '<COMPLETION>', '</QUERY>', '<QUERY>']
+        fim_stop_sequences = ["</COMPLETION>", "<COMPLETION>", "</QUERY>", "<QUERY>"]
         if isinstance(first_message_content, str):
             msg_prompt = first_message_content
         elif isinstance(first_message_content, list):
-            msg_prompt = first_message_content[0].get('text', '')
+            msg_prompt = first_message_content[0].get("text", "")
         else:
-            logger.warning(f'Could not determine if message was FIM from data: {data}')
+            logger.warning(f"Could not determine if message was FIM from data: {data}")
             return False
         return all([stop_sequence in msg_prompt for stop_sequence in fim_stop_sequences])
 
@@ -121,7 +119,7 @@ def _is_fim_request(self, request: Request, data: Dict) -> bool:
         return self._is_fim_request_body(data)
 
     async def complete(
-            self, data: Dict, api_key: Optional[str], is_fim_request: bool
+        self, data: Dict, api_key: Optional[str], is_fim_request: bool
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """
         Main completion flow with pipeline integration
 
@@ -1,5 +1,5 @@
-import json
 import asyncio
+import json
 from typing import Any, AsyncIterator, Iterator, Optional, Union
 
 from fastapi.responses import StreamingResponse
@@ -39,16 +39,20 @@ async def execute_completion(
         """
         model_path = f"{Config.get_config().model_base_path}/{request['model']}.gguf"
 
-        if 'prompt' in request:
-            response = await self.inference_engine.complete(model_path,
-                                                        Config.get_config().chat_model_n_ctx,
-                                                        Config.get_config().chat_model_n_gpu_layers,
-                                                        **request)
+        if "prompt" in request:
+            response = await self.inference_engine.complete(
+                model_path,
+                Config.get_config().chat_model_n_ctx,
+                Config.get_config().chat_model_n_gpu_layers,
+                **request,
+            )
         else:
-            response = await self.inference_engine.chat(model_path,
-                                                        Config.get_config().chat_model_n_ctx,
-                                                        Config.get_config().chat_model_n_gpu_layers,
-                                                        **request)
+            response = await self.inference_engine.chat(
+                model_path,
+                Config.get_config().chat_model_n_ctx,
+                Config.get_config().chat_model_n_gpu_layers,
+                **request,
+            )
         return response
 
     def create_streaming_response(self, stream: Iterator[Any]) -> StreamingResponse: