Skip to content

Commit cd9a580

Browse files
fix: Logging module always producing JSON
Right now even if we specify that we want TEXT logging output Codegate will use JSON. This fixes it. It also introduces `structlog` as a convenient already built formatter instead of making our own formatters hand-made
1 parent dc988a3 commit cd9a580

File tree

11 files changed

+119
-221
lines changed

11 files changed

+119
-221
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ weaviate = ">=0.1.2"
1616
weaviate-client = ">=4.9.3"
1717
torch = ">=2.5.1"
1818
transformers = ">=4.46.3"
19-
19+
structlog = ">=24.4.0"
2020
litellm = "^1.52.16"
2121
llama_cpp_python = ">=0.3.2"
2222

src/codegate/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Dict, Optional
66

77
import click
8+
import structlog
89

910
from codegate.codegate_logging import LogFormat, LogLevel, setup_logging
1011
from codegate.config import Config, ConfigurationError
@@ -140,7 +141,8 @@ def serve(
140141
cli_provider_urls=cli_provider_urls,
141142
)
142143

143-
logger = setup_logging(cfg.log_level, cfg.log_format)
144+
setup_logging(cfg.log_level, cfg.log_format)
145+
logger = structlog.get_logger("codegate")
144146
logger.info(
145147
"Starting server",
146148
extra={

src/codegate/codegate_logging.py

Lines changed: 45 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import datetime
2-
import json
31
import logging
42
import sys
53
from enum import Enum
6-
from typing import Any, Optional
4+
from typing import Optional
5+
6+
import structlog
77

88

99
class LogLevel(str, Enum):
@@ -46,123 +46,6 @@ def _missing_(cls, value: str) -> Optional["LogFormat"]:
4646
)
4747

4848

49-
class JSONFormatter(logging.Formatter):
50-
"""Custom formatter that outputs log records as JSON."""
51-
52-
def __init__(self) -> None:
53-
"""Initialize the JSON formatter."""
54-
super().__init__()
55-
self.default_time_format = "%Y-%m-%dT%H:%M:%S"
56-
self.default_msec_format = "%s.%03dZ"
57-
58-
def format(self, record: logging.LogRecord) -> str:
59-
"""Format the log record as a JSON string.
60-
61-
Args:
62-
record: The log record to format
63-
64-
Returns:
65-
str: JSON formatted log entry
66-
"""
67-
# Create the base log entry
68-
log_entry: dict[str, Any] = {
69-
"timestamp": self.formatTime(record, self.default_time_format),
70-
"level": record.levelname,
71-
"module": record.module,
72-
"message": record.getMessage(),
73-
"extra": {},
74-
}
75-
76-
# Add extra fields from the record
77-
extra_attrs = {}
78-
for key, value in record.__dict__.items():
79-
if key not in {
80-
"args",
81-
"asctime",
82-
"created",
83-
"exc_info",
84-
"exc_text",
85-
"filename",
86-
"funcName",
87-
"levelname",
88-
"levelno",
89-
"lineno",
90-
"module",
91-
"msecs",
92-
"msg",
93-
"name",
94-
"pathname",
95-
"process",
96-
"processName",
97-
"relativeCreated",
98-
"stack_info",
99-
"thread",
100-
"threadName",
101-
"extra",
102-
}:
103-
extra_attrs[key] = value
104-
105-
# Handle the explicit extra parameter if present
106-
if hasattr(record, "extra"):
107-
try:
108-
if isinstance(record.extra, dict):
109-
extra_attrs.update(record.extra)
110-
except Exception:
111-
extra_attrs["unserializable_extra"] = str(record.extra)
112-
113-
# Add all extra attributes to the log entry
114-
if extra_attrs:
115-
try:
116-
json.dumps(extra_attrs) # Test if serializable
117-
log_entry["extra"] = extra_attrs
118-
except (TypeError, ValueError):
119-
# If serialization fails, convert values to strings
120-
serializable_extra = {}
121-
for key, value in extra_attrs.items():
122-
try:
123-
json.dumps({key: value}) # Test individual value
124-
serializable_extra[key] = value
125-
except (TypeError, ValueError):
126-
serializable_extra[key] = str(value)
127-
log_entry["extra"] = serializable_extra
128-
129-
# Handle exception info if present
130-
if record.exc_info:
131-
log_entry["extra"]["exception"] = self.formatException(record.exc_info)
132-
133-
# Handle stack info if present
134-
if record.stack_info:
135-
log_entry["extra"]["stack_info"] = self.formatStack(record.stack_info)
136-
137-
return json.dumps(log_entry)
138-
139-
140-
class TextFormatter(logging.Formatter):
141-
"""Standard text formatter with consistent timestamp format."""
142-
143-
def __init__(self) -> None:
144-
"""Initialize the text formatter."""
145-
super().__init__(
146-
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
147-
datefmt="%Y-%m-%dT%H:%M:%S.%03dZ",
148-
)
149-
150-
def formatTime( # noqa: N802
151-
self, record: logging.LogRecord, datefmt: Optional[str] = None
152-
) -> str:
153-
"""Format the time with millisecond precision.
154-
155-
Args:
156-
record: The log record
157-
datefmt: The date format string (ignored as we use a fixed format)
158-
159-
Returns:
160-
str: Formatted timestamp
161-
"""
162-
ct = datetime.datetime.fromtimestamp(record.created, datetime.UTC)
163-
return ct.strftime(self.datefmt)
164-
165-
16649
def setup_logging(
16750
log_level: Optional[LogLevel] = None, log_format: Optional[LogFormat] = None
16851
) -> logging.Logger:
@@ -181,10 +64,47 @@ def setup_logging(
18164
if log_format is None:
18265
log_format = LogFormat.JSON
18366

184-
# Create formatters
185-
json_formatter = JSONFormatter()
186-
text_formatter = TextFormatter()
187-
formatter = json_formatter if log_format == LogFormat.JSON else text_formatter
67+
# The configuration was taken from structlog documentation
68+
# https://www.structlog.org/en/stable/standard-library.html
69+
# Specifically the section "Rendering Using structlog-based Formatters Within logging"
70+
71+
# Adds log level and timestamp to log entries
72+
shared_processors = [
73+
structlog.processors.add_log_level,
74+
structlog.processors.TimeStamper(fmt="%Y-%m-%dT%H:%M:%S.%03dZ", utc=True),
75+
]
76+
# Not sure why this is needed. I think it is a wrapper for the standard logging module.
77+
# Should allow to log both with structlog and the standard logging module:
78+
# import logging
79+
# import structlog
80+
# logging.getLogger("stdlog").info("woo")
81+
# structlog.get_logger("structlog").info("amazing", events="oh yes")
82+
structlog.configure(
83+
processors=shared_processors
84+
+ [
85+
# Prepare event dict for `ProcessorFormatter`.
86+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
87+
],
88+
logger_factory=structlog.stdlib.LoggerFactory(),
89+
cache_logger_on_first_use=True,
90+
)
91+
92+
# The config aboves adds the following keys to all log entries: _record & _from_structlog.
93+
# remove_processors_meta removes them.
94+
processors = shared_processors + [structlog.stdlib.ProcessorFormatter.remove_processors_meta]
95+
# Choose the processors based on the log format
96+
if log_format == LogFormat.JSON:
97+
processors = processors + [
98+
structlog.processors.dict_tracebacks,
99+
structlog.processors.JSONRenderer(),
100+
]
101+
else:
102+
processors = processors + [structlog.dev.ConsoleRenderer()]
103+
formatter = structlog.stdlib.ProcessorFormatter(
104+
# foreign_pre_chain run ONLY on `logging` entries that do NOT originate within structlog.
105+
foreign_pre_chain=shared_processors,
106+
processors=processors,
107+
)
188108

189109
# Create handlers for stdout and stderr
190110
stdout_handler = logging.StreamHandler(sys.stdout)
@@ -208,7 +128,7 @@ def setup_logging(
208128
root_logger.addHandler(stderr_handler)
209129

210130
# Create a logger for our package
211-
logger = logging.getLogger("codegate")
131+
logger = structlog.get_logger("codegate")
212132
logger.debug(
213133
"Logging initialized",
214134
extra={
@@ -217,5 +137,3 @@ def setup_logging(
217137
"handlers": ["stdout", "stderr"],
218138
},
219139
)
220-
221-
return logger

src/codegate/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
from pathlib import Path
66
from typing import Dict, Optional, Union
77

8+
import structlog
89
import yaml
910

10-
from codegate.codegate_logging import LogFormat, LogLevel, setup_logging
11+
from codegate.codegate_logging import LogFormat, LogLevel
1112
from codegate.exceptions import ConfigurationError
1213
from codegate.prompts import PromptConfig
1314

14-
logger = setup_logging()
15+
logger = structlog.get_logger("codegate")
1516

1617
# Default provider URLs
1718
DEFAULT_PROVIDER_URLS = {

src/codegate/inference/inference_engine.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import structlog
12
from llama_cpp import Llama
23

3-
from codegate.codegate_logging import setup_logging
4+
logger = structlog.get_logger("codegate")
45

56

67
class LlamaCppInferenceEngine:
@@ -21,7 +22,6 @@ def __new__(cls):
2122
def __init__(self):
2223
if not hasattr(self, "models"):
2324
self.__models = {}
24-
self.__logger = setup_logging()
2525

2626
def __del__(self):
2727
self.__close_models()
@@ -32,7 +32,7 @@ async def __get_model(self, model_path, embedding=False, n_ctx=512, n_gpu_layers
3232
is loaded and added to __models and returned.
3333
"""
3434
if model_path not in self.__models:
35-
self.__logger.info(
35+
logger.info(
3636
f"Loading model from {model_path} with parameters "
3737
f"n_gpu_layers={n_gpu_layers} and n_ctx={n_ctx}"
3838
)

src/codegate/pipeline/fim/secret_analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
import structlog
12
from litellm import ChatCompletionRequest
23

3-
from codegate.codegate_logging import setup_logging
44
from codegate.pipeline.base import PipelineContext, PipelineResponse, PipelineResult, PipelineStep
55

6-
logger = setup_logging()
6+
logger = structlog.get_logger("codegate")
77

88

99
class SecretAnalyzer(PipelineStep):

src/codegate/pipeline/secrets/secrets.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import re
22

3+
import structlog
34
from litellm import ChatCompletionRequest
45

5-
from codegate.codegate_logging import setup_logging
66
from codegate.pipeline.base import (
77
PipelineContext,
88
PipelineResult,
@@ -11,6 +11,8 @@
1111
from codegate.pipeline.secrets.gatecrypto import CodeGateCrypto
1212
from codegate.pipeline.secrets.signatures import CodegateSignatures
1313

14+
logger = structlog.get_logger("codegate")
15+
1416

1517
class CodegateSecrets(PipelineStep):
1618
"""Pipeline step that handles secret information requests."""
@@ -21,7 +23,6 @@ def __init__(self):
2123
self.crypto = CodeGateCrypto()
2224
self._session_store = {}
2325
self._encrypted_to_session = {} # Reverse lookup index
24-
self.__logger = setup_logging()
2526

2627
@property
2728
def name(self) -> str:
@@ -87,7 +88,7 @@ def _redeact_text(self, text: str) -> str:
8788
if not matches:
8889
return text
8990

90-
self.__logger.debug(f"Found {len(matches)} secrets in the user message")
91+
logger.debug(f"Found {len(matches)} secrets in the user message")
9192

9293
# Convert line positions to absolute positions and extend boundaries
9394
absolute_matches = []
@@ -133,7 +134,7 @@ def _redeact_text(self, text: str) -> str:
133134
self._encrypted_to_session[encrypted_value] = session_id
134135

135136
# Print the session store
136-
self.__logger.info(f"Session store: {self._session_store}")
137+
logger.info(f"Session store: {self._session_store}")
137138

138139
# Create the replacement string
139140
replacement = f"REDACTED<${encrypted_value}>"
@@ -155,12 +156,12 @@ def _redeact_text(self, text: str) -> str:
155156
protected_string = "".join(protected_text)
156157

157158
# Log the findings
158-
self.__logger.info("\nFound secrets:")
159+
logger.info("\nFound secrets:")
159160
for secret in found_secrets:
160-
self.__logger.info(f"\nService: {secret['service']}")
161-
self.__logger.info(f"Type: {secret['type']}")
162-
self.__logger.info(f"Original: {secret['original']}")
163-
self.__logger.info(f"Encrypted: REDACTED<${secret['encrypted']}>")
161+
logger.info(f"\nService: {secret['service']}")
162+
logger.info(f"Type: {secret['type']}")
163+
logger.info(f"Original: {secret['original']}")
164+
logger.info(f"Encrypted: REDACTED<${secret['encrypted']}>")
164165

165166
(f"\nProtected text:\n{protected_string}")
166167
return protected_string
@@ -181,7 +182,7 @@ def _get_original_value(self, encrypted_value: str) -> str:
181182
if session_id:
182183
return self._session_store[session_id]["original"]
183184
except Exception as e:
184-
self.__logger.error(f"Error looking up original value: {e}")
185+
logger.error(f"Error looking up original value: {e}")
185186
return encrypted_value
186187

187188
def get_by_session_id(self, session_id: str) -> dict | None:
@@ -197,7 +198,7 @@ def get_by_session_id(self, session_id: str) -> dict | None:
197198
try:
198199
return self._session_store.get(session_id)
199200
except Exception as e:
200-
self.__logger.error(f"Error looking up by session ID: {e}")
201+
logger.error(f"Error looking up by session ID: {e}")
201202
return None
202203

203204
def _cleanup_session_store(self):
@@ -215,9 +216,9 @@ def _cleanup_session_store(self):
215216
self._session_store.clear()
216217
self._encrypted_to_session.clear()
217218

218-
self.__logger.info("Session stores securely wiped")
219+
logger.info("Session stores securely wiped")
219220
except Exception as e:
220-
self.__logger.error(f"Error during secure cleanup: {e}")
221+
logger.error(f"Error during secure cleanup: {e}")
221222

222223
def _unredact_text(self, protected_text: str) -> str:
223224
"""
@@ -253,7 +254,7 @@ def _unredact_text(self, protected_text: str) -> str:
253254

254255
# Join all parts together
255256
unprotected_text = "".join(result)
256-
self.__logger.info(f"\nUnprotected text:\n{unprotected_text}")
257+
logger.info(f"\nUnprotected text:\n{unprotected_text}")
257258
return unprotected_text
258259

259260
async def process(
@@ -293,7 +294,7 @@ async def process(
293294

294295
return PipelineResult(request=request)
295296
except Exception as e:
296-
self.__logger.error(f"CodegateSecrets operation failed: {e}")
297+
logger.error(f"CodegateSecrets operation failed: {e}")
297298

298299
finally:
299300
# Clean up sensitive data

0 commit comments

Comments
 (0)