Skip to content

Commit 033b6e5

Browse files
committed
Merge branch 'main' of https://github.com/stacklok/codegate
2 parents 36e02f1 + 6184edd commit 033b6e5

File tree

25 files changed

+712
-128
lines changed

25 files changed

+712
-128
lines changed

.github/workflows/image-build.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ jobs:
2020
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
2121
- name: Set up Docker Buildx
2222
uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3
23+
- name: Fetch latest FE commit SHA
24+
id: fetch_commit_fe_sha
25+
run: |
26+
echo "LATEST_COMMIT_SHA=$(curl -LSsk 'https://api.github.com/repos/stacklok/codegate-ui/commits?per_page=1' -H 'Authorization: Bearer ${{ secrets.GH_CI_TOKEN }}' | jq -r '.[0].sha')" >> $GITHUB_ENV
2327
- name: Test build on x86
2428
id: docker_build
2529
uses: docker/build-push-action@48aba3b46d1b1fec4febb7c5d0c644b249a11355 # v5
@@ -30,4 +34,8 @@ jobs:
3034
push: false # Only attempt to build, to verify the Dockerfile is working
3135
load: true
3236
cache-from: type=gha
33-
cache-to: type=gha,mode=max
37+
cache-to: type=gha,mode=max
38+
secrets: |
39+
gh_token=${{ secrets.GH_CI_TOKEN }}
40+
build-args: |
41+
LATEST_COMMIT_SHA=${{ env.LATEST_COMMIT_SHA }}

.github/workflows/image-publish.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ jobs:
6161
name_is_regexp: true
6262
skip_unpack: false
6363
if_no_artifact_found: ignore
64+
- name: Fetch latest FE commit SHA
65+
id: fetch_commit_fe_sha
66+
run: |
67+
echo "LATEST_COMMIT_SHA=$(curl -LSsk 'https://api.github.com/repos/stacklok/codegate-ui/commits?per_page=1' -H 'Authorization: Bearer ${{ secrets.GH_CI_TOKEN }}' | jq -r '.[0].sha')" >> $GITHUB_ENV
6468
- name: Rename to accomodate to image
6569
run: mv ./backup_weaviate ./weaviate_backup
6670
- name: Build image
@@ -76,3 +80,7 @@ jobs:
7680
labels: ${{ steps.docker-metadata.outputs.labels }}
7781
cache-from: type=gha
7882
cache-to: type=gha,mode=max
83+
secrets: |
84+
gh_token=${{ secrets.GH_CI_TOKEN }}
85+
build-args: |
86+
LATEST_COMMIT_SHA=${{ env.LATEST_COMMIT_SHA }}

Dockerfile

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,74 @@ RUN poetry config virtualenvs.create false && \
2121
# Copy the rest of the application
2222
COPY . /app
2323

24+
# Build the webapp
25+
FROM node:20.18-slim AS webbuilder
26+
27+
# Install curl for downloading the webapp from GH and unzip to extract it
28+
RUN apt-get update && apt-get install -y --no-install-recommends \
29+
curl \
30+
unzip\
31+
ca-certificates
32+
33+
WORKDIR /usr/src/
34+
35+
# Get the latest commit sha as a build arg
36+
# This is needed otherwise Docker will cache the git clone step. With this workaround
37+
# we can force Docker to re-run the git clone step if the latest commit sha changes.
38+
# --build-arg LATEST_COMMIT_SHA=$(curl \
39+
# -LSsk "https://api.github.com/repos/stacklok/codegate-ui/commits?per_page=1" \
40+
# -H "Authorization: Bearer $GH_CI_TOKEN" | jq -r '.[0].sha')
41+
ARG LATEST_COMMIT_SHA=LATEST
42+
RUN echo "Latest FE commit: $LATEST_COMMIT_SHA"
43+
# Download the webapp from GH
44+
# -L to follow redirects
45+
RUN --mount=type=secret,id=gh_token \
46+
LATEST_COMMIT_SHA=${LATEST_COMMIT_SHA} \
47+
curl -L -o main.zip "https://api.github.com/repos/stacklok/codegate-ui/zipball/main" \
48+
-H "Authorization: Bearer $(cat /run/secrets/gh_token)"
49+
50+
# Extract the downloaded zip file
51+
RUN unzip main.zip
52+
RUN rm main.zip
53+
# Rename the extracted folder
54+
RUN mv *codegate-ui* webapp
55+
56+
WORKDIR /usr/src/webapp
57+
58+
# Install the webapp dependencies and build it
59+
RUN npm install
60+
RUN npm run build
61+
2462
# Runtime stage: Create the final lightweight image
2563
FROM python:3.12-slim AS runtime
2664

2765
# Install runtime system dependencies
2866
RUN apt-get update && apt-get install -y --no-install-recommends \
2967
libgomp1 \
68+
nginx \
3069
&& rm -rf /var/lib/apt/lists/*
3170

32-
# Create a non-root user and switch to it
71+
# Create a non-root user
3372
RUN useradd -m -u 1000 -r codegate
73+
74+
# Set permissions for user codegate to run nginx
75+
RUN chown -R codegate /var/lib/nginx && \
76+
chown -R codegate /var/log/nginx && \
77+
chown -R codegate /run
78+
79+
# Switch to codegate user
3480
USER codegate
3581
WORKDIR /app
3682

3783
# Copy necessary artifacts from the builder stage
3884
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
3985
COPY --from=builder /app /app
4086

87+
# Copy necessary artifacts from the webbuilder stage
88+
COPY --from=webbuilder /usr/src/webapp/dist /var/www/html
89+
# Expose nginx
90+
EXPOSE 80
91+
4192
# Set the PYTHONPATH environment variable
4293
ENV PYTHONPATH=/app/src
4394

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Codegate
1+
![image](https://github.com/user-attachments/assets/ab37063d-039d-4857-be88-231047a7b282)
2+
23

34
[![CI](https://github.com/stacklok/codegate/actions/workflows/ci.yml/badge.svg)](https://github.com/stacklok/codegate/actions/workflows/ci.yml)
45

config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ log_level: "INFO" # One of: ERROR, WARNING, INFO, DEBUG
1919
##
2020

2121
# Model to use for chatting
22-
chat_model_path: "./models"
22+
model_base_path: "./models"
2323

2424
# Context length of the model
2525
chat_model_n_ctx: 32768
2626

2727
# Number of layers to offload to GPU. If -1, all layers are offloaded.
2828
chat_model_n_gpu_layers: -1
2929

30+
# Embedding model
31+
embedding_model: "all-minilm-L6-v2-q5_k_m.gguf"

prompts/default.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ lookup_packages: |
2424
Your job is to extract any software packages from user's request. A package is a named entity.
2525
You MUST RESPOND with a list of packages in JSON FORMAT: {"packages": [pkg1, pkg2, ...]}.
2626
27+
secrets_redacted: |
28+
The files in the context contain sensitive information that has been redacted. Do not warn the user
29+
about any tokens, passwords or similar sensitive information in the context whose value begins with
30+
the string "REDACTED".
31+
2732
# Security-focused prompts
2833
security_audit: "You are a security expert conducting a thorough code review. Identify potential security vulnerabilities, suggest improvements, and explain security best practices."
2934

scripts/entrypoint.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ else
99
echo "No backup found at $1/$2. Skipping restore."
1010
fi
1111

12-
# Step 2: Start the main application (serve)
12+
# Step 2: Start the Nginx server with FE
13+
echo "Starting the dashboard.. "
14+
exec nginx -g 'daemon off;' &
15+
16+
# Step 3: Start the main application (serve)
1317
echo "Starting the application..."
14-
exec python -m src.codegate.cli serve --port 8989 --host 0.0.0.0
18+
exec python -m src.codegate.cli serve --port 8989 --host 0.0.0.0 --vllm-url https://inference.codegate.ai

scripts/import_packages.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
import shutil
55

6-
76
import weaviate
87
from weaviate.classes.config import DataType, Property
98
from weaviate.embedded import EmbeddedOptions
@@ -17,10 +16,12 @@ class PackageImporter:
1716
def __init__(self):
1817
self.client = weaviate.WeaviateClient(
1918
embedded_options=EmbeddedOptions(
20-
persistence_data_path="./weaviate_data", grpc_port=50052,
21-
additional_env_vars={"ENABLE_MODULES": "backup-filesystem",
22-
"BACKUP_FILESYSTEM_PATH": os.getenv("BACKUP_FILESYSTEM_PATH",
23-
"/tmp")}
19+
persistence_data_path="./weaviate_data",
20+
grpc_port=50052,
21+
additional_env_vars={
22+
"ENABLE_MODULES": "backup-filesystem",
23+
"BACKUP_FILESYSTEM_PATH": os.getenv("BACKUP_FILESYSTEM_PATH", "/tmp"),
24+
},
2425
)
2526
)
2627
self.json_files = [
@@ -35,21 +36,28 @@ def __init__(self):
3536
def restore_backup(self):
3637
if os.getenv("BACKUP_FOLDER"):
3738
try:
38-
self.client.backup.restore(backup_id=os.getenv("BACKUP_FOLDER"),
39-
backend="filesystem", wait_for_completion=True)
39+
self.client.backup.restore(
40+
backup_id=os.getenv("BACKUP_FOLDER"),
41+
backend="filesystem",
42+
wait_for_completion=True,
43+
)
4044
except Exception as e:
4145
print(f"Failed to restore backup: {e}")
4246

4347
def take_backup(self):
4448
# if backup folder exists, remove it
45-
backup_path = os.path.join(os.getenv("BACKUP_FILESYSTEM_PATH", "/tmp"),
46-
os.getenv("BACKUP_TARGET_ID", "backup"))
49+
backup_path = os.path.join(
50+
os.getenv("BACKUP_FILESYSTEM_PATH", "/tmp"), os.getenv("BACKUP_TARGET_ID", "backup")
51+
)
4752
if os.path.exists(backup_path):
4853
shutil.rmtree(backup_path)
4954

5055
#  take a backup of the data
51-
self.client.backup.create(backup_id=os.getenv("BACKUP_TARGET_ID", "backup"),
52-
backend="filesystem", wait_for_completion=True)
56+
self.client.backup.create(
57+
backup_id=os.getenv("BACKUP_TARGET_ID", "backup"),
58+
backend="filesystem",
59+
wait_for_completion=True,
60+
)
5361

5462
def setup_schema(self):
5563
if not self.client.collections.exists("Package"):

src/codegate/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
from typing import Dict, Optional
66

77
import click
8-
from src.codegate.storage.utils import restore_storage_backup
98
import structlog
109

1110
from codegate.codegate_logging import LogFormat, LogLevel, setup_logging
1211
from codegate.config import Config, ConfigurationError
1312
from codegate.db.connection import init_db_sync
1413
from codegate.server import init_app
14+
from codegate.storage.utils import restore_storage_backup
1515

1616

1717
def validate_port(ctx: click.Context, param: click.Parameter, value: int) -> int:

src/codegate/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class Config:
4040
model_base_path: str = "./models"
4141
chat_model_n_ctx: int = 32768
4242
chat_model_n_gpu_layers: int = -1
43+
embedding_model: str = "all-minilm-L6-v2-q5_k_m.gguf"
4344

4445
# Provider URLs with defaults
4546
provider_urls: Dict[str, str] = field(default_factory=lambda: DEFAULT_PROVIDER_URLS.copy())
@@ -117,11 +118,12 @@ def from_file(cls, config_path: Union[str, Path]) -> "Config":
117118
host=config_data.get("host", cls.host),
118119
log_level=config_data.get("log_level", cls.log_level.value),
119120
log_format=config_data.get("log_format", cls.log_format.value),
120-
model_base_path=config_data.get("chat_model_path", cls.model_base_path),
121+
model_base_path=config_data.get("model_base_path", cls.model_base_path),
121122
chat_model_n_ctx=config_data.get("chat_model_n_ctx", cls.chat_model_n_ctx),
122123
chat_model_n_gpu_layers=config_data.get(
123124
"chat_model_n_gpu_layers", cls.chat_model_n_gpu_layers
124125
),
126+
embedding_model=config_data.get("embedding_model", cls.embedding_model),
125127
prompts=prompts_config,
126128
provider_urls=provider_urls,
127129
)

src/codegate/llm_utils/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from codegate.llm_utils.extractor import PackageExtractor
2+
from codegate.llm_utils.llmclient import LLMClient
3+
4+
__all__ = ["LLMClient", "PackageExtractor"]

src/codegate/llm_utils/extractor.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import List, Optional
2+
3+
import structlog
4+
5+
from codegate.config import Config
6+
from codegate.llm_utils.llmclient import LLMClient
7+
from codegate.storage import StorageEngine
8+
9+
logger = structlog.get_logger("codegate")
10+
11+
12+
class PackageExtractor:
13+
"""
14+
Utility class to extract package names from code or queries.
15+
"""
16+
17+
def __init__(self):
18+
self.storage_engine = StorageEngine()
19+
20+
@staticmethod
21+
async def extract_packages(
22+
content: str,
23+
provider: str,
24+
model: str = None,
25+
base_url: Optional[str] = None,
26+
api_key: Optional[str] = None,
27+
) -> List[str]:
28+
"""Extract package names from the given content."""
29+
system_prompt = Config.get_config().prompts.lookup_packages
30+
31+
result = await LLMClient.complete(
32+
content=content,
33+
system_prompt=system_prompt,
34+
provider=provider,
35+
model=model,
36+
api_key=api_key,
37+
base_url=base_url,
38+
)
39+
40+
# Handle both formats: {"packages": [...]} and direct list [...]
41+
packages = result if isinstance(result, list) else result.get("packages", [])
42+
logger.info(f"Extracted packages: {packages}")
43+
return packages

0 commit comments

Comments
 (0)