Skip to content

Commit b58e5aa

Browse files
Merge pull request stacklok#129 from stacklok/fix-logging
fix: Logging module always producing JSON
2 parents dc988a3 + f72b065 commit b58e5aa

14 files changed

+149
-229
lines changed

docs/development.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ codegate/
5151
│ ├── cli.py # Command-line interface
5252
│ ├── config.py # Configuration management
5353
│ ├── exceptions.py # Shared exceptions
54-
│ ├── logging.py # Logging setup
54+
│ ├── codegate_logging.py # Logging setup
5555
│ ├── prompts.py # Prompts management
5656
│ ├── server.py # Main server implementation
5757
│ └── providers/ # External service providers

docs/logging.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Logging System
22

3-
The logging system in Codegate (`logging.py`) provides a flexible and structured logging solution with support for both JSON and text formats.
3+
The logging system in Codegate (`codegate_logging.py`) provides a flexible and structured logging solution with support for both JSON and text formats.
44

55
## Log Routing
66

@@ -18,9 +18,9 @@ When using JSON format (default), log entries include:
1818
```json
1919
{
2020
"timestamp": "YYYY-MM-DDThh:mm:ss.mmmZ",
21-
"level": "LOG_LEVEL",
21+
"log_level": "LOG_LEVEL",
2222
"module": "MODULE_NAME",
23-
"message": "Log message",
23+
"event": "Log message",
2424
"extra": {
2525
// Additional fields as you desire
2626
}
@@ -49,9 +49,9 @@ YYYY-MM-DDThh:mm:ss.mmmZ - LEVEL - NAME - MESSAGE
4949
### Basic Logging
5050

5151
```python
52-
import logging
52+
import structlog
5353

54-
logger = logging.getLogger(__name__)
54+
logger = structlog.get_logger(__name__)
5555

5656
# Different log levels
5757
logger.info("This is an info message")

poetry.lock

Lines changed: 19 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ weaviate = ">=0.1.2"
1616
weaviate-client = ">=4.9.3"
1717
torch = ">=2.5.1"
1818
transformers = ">=4.46.3"
19-
19+
structlog = ">=24.4.0"
2020
litellm = "^1.52.16"
2121
llama_cpp_python = ">=0.3.2"
2222

src/codegate/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Dict, Optional
66

77
import click
8+
import structlog
89

910
from codegate.codegate_logging import LogFormat, LogLevel, setup_logging
1011
from codegate.config import Config, ConfigurationError
@@ -140,7 +141,8 @@ def serve(
140141
cli_provider_urls=cli_provider_urls,
141142
)
142143

143-
logger = setup_logging(cfg.log_level, cfg.log_format)
144+
setup_logging(cfg.log_level, cfg.log_format)
145+
logger = structlog.get_logger("codegate")
144146
logger.info(
145147
"Starting server",
146148
extra={

src/codegate/codegate_logging.py

Lines changed: 50 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import datetime
2-
import json
31
import logging
42
import sys
53
from enum import Enum
6-
from typing import Any, Optional
4+
from typing import Optional
5+
6+
import structlog
77

88

99
class LogLevel(str, Enum):
@@ -46,123 +46,6 @@ def _missing_(cls, value: str) -> Optional["LogFormat"]:
4646
)
4747

4848

49-
class JSONFormatter(logging.Formatter):
50-
"""Custom formatter that outputs log records as JSON."""
51-
52-
def __init__(self) -> None:
53-
"""Initialize the JSON formatter."""
54-
super().__init__()
55-
self.default_time_format = "%Y-%m-%dT%H:%M:%S"
56-
self.default_msec_format = "%s.%03dZ"
57-
58-
def format(self, record: logging.LogRecord) -> str:
59-
"""Format the log record as a JSON string.
60-
61-
Args:
62-
record: The log record to format
63-
64-
Returns:
65-
str: JSON formatted log entry
66-
"""
67-
# Create the base log entry
68-
log_entry: dict[str, Any] = {
69-
"timestamp": self.formatTime(record, self.default_time_format),
70-
"level": record.levelname,
71-
"module": record.module,
72-
"message": record.getMessage(),
73-
"extra": {},
74-
}
75-
76-
# Add extra fields from the record
77-
extra_attrs = {}
78-
for key, value in record.__dict__.items():
79-
if key not in {
80-
"args",
81-
"asctime",
82-
"created",
83-
"exc_info",
84-
"exc_text",
85-
"filename",
86-
"funcName",
87-
"levelname",
88-
"levelno",
89-
"lineno",
90-
"module",
91-
"msecs",
92-
"msg",
93-
"name",
94-
"pathname",
95-
"process",
96-
"processName",
97-
"relativeCreated",
98-
"stack_info",
99-
"thread",
100-
"threadName",
101-
"extra",
102-
}:
103-
extra_attrs[key] = value
104-
105-
# Handle the explicit extra parameter if present
106-
if hasattr(record, "extra"):
107-
try:
108-
if isinstance(record.extra, dict):
109-
extra_attrs.update(record.extra)
110-
except Exception:
111-
extra_attrs["unserializable_extra"] = str(record.extra)
112-
113-
# Add all extra attributes to the log entry
114-
if extra_attrs:
115-
try:
116-
json.dumps(extra_attrs) # Test if serializable
117-
log_entry["extra"] = extra_attrs
118-
except (TypeError, ValueError):
119-
# If serialization fails, convert values to strings
120-
serializable_extra = {}
121-
for key, value in extra_attrs.items():
122-
try:
123-
json.dumps({key: value}) # Test individual value
124-
serializable_extra[key] = value
125-
except (TypeError, ValueError):
126-
serializable_extra[key] = str(value)
127-
log_entry["extra"] = serializable_extra
128-
129-
# Handle exception info if present
130-
if record.exc_info:
131-
log_entry["extra"]["exception"] = self.formatException(record.exc_info)
132-
133-
# Handle stack info if present
134-
if record.stack_info:
135-
log_entry["extra"]["stack_info"] = self.formatStack(record.stack_info)
136-
137-
return json.dumps(log_entry)
138-
139-
140-
class TextFormatter(logging.Formatter):
141-
"""Standard text formatter with consistent timestamp format."""
142-
143-
def __init__(self) -> None:
144-
"""Initialize the text formatter."""
145-
super().__init__(
146-
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
147-
datefmt="%Y-%m-%dT%H:%M:%S.%03dZ",
148-
)
149-
150-
def formatTime( # noqa: N802
151-
self, record: logging.LogRecord, datefmt: Optional[str] = None
152-
) -> str:
153-
"""Format the time with millisecond precision.
154-
155-
Args:
156-
record: The log record
157-
datefmt: The date format string (ignored as we use a fixed format)
158-
159-
Returns:
160-
str: Formatted timestamp
161-
"""
162-
ct = datetime.datetime.fromtimestamp(record.created, datetime.UTC)
163-
return ct.strftime(self.datefmt)
164-
165-
16649
def setup_logging(
16750
log_level: Optional[LogLevel] = None, log_format: Optional[LogFormat] = None
16851
) -> logging.Logger:
@@ -181,10 +64,52 @@ def setup_logging(
18164
if log_format is None:
18265
log_format = LogFormat.JSON
18366

184-
# Create formatters
185-
json_formatter = JSONFormatter()
186-
text_formatter = TextFormatter()
187-
formatter = json_formatter if log_format == LogFormat.JSON else text_formatter
67+
# The configuration was taken from structlog documentation
68+
# https://www.structlog.org/en/stable/standard-library.html
69+
# Specifically the section "Rendering Using structlog-based Formatters Within logging"
70+
71+
# Adds log level and timestamp to log entries
72+
shared_processors = [
73+
structlog.processors.add_log_level,
74+
structlog.processors.TimeStamper(fmt="%Y-%m-%dT%H:%M:%S.%03dZ", utc=True),
75+
structlog.processors.CallsiteParameterAdder(
76+
[
77+
structlog.processors.CallsiteParameter.MODULE,
78+
]
79+
),
80+
]
81+
# Not sure why this is needed. I think it is a wrapper for the standard logging module.
82+
# Should allow to log both with structlog and the standard logging module:
83+
# import logging
84+
# import structlog
85+
# logging.getLogger("stdlog").info("woo")
86+
# structlog.get_logger("structlog").info("amazing", events="oh yes")
87+
structlog.configure(
88+
processors=shared_processors
89+
+ [
90+
# Prepare event dict for `ProcessorFormatter`.
91+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
92+
],
93+
logger_factory=structlog.stdlib.LoggerFactory(),
94+
cache_logger_on_first_use=True,
95+
)
96+
97+
# The config aboves adds the following keys to all log entries: _record & _from_structlog.
98+
# remove_processors_meta removes them.
99+
processors = shared_processors + [structlog.stdlib.ProcessorFormatter.remove_processors_meta]
100+
# Choose the processors based on the log format
101+
if log_format == LogFormat.JSON:
102+
processors = processors + [
103+
structlog.processors.dict_tracebacks,
104+
structlog.processors.JSONRenderer(),
105+
]
106+
else:
107+
processors = processors + [structlog.dev.ConsoleRenderer()]
108+
formatter = structlog.stdlib.ProcessorFormatter(
109+
# foreign_pre_chain run ONLY on `logging` entries that do NOT originate within structlog.
110+
foreign_pre_chain=shared_processors,
111+
processors=processors,
112+
)
188113

189114
# Create handlers for stdout and stderr
190115
stdout_handler = logging.StreamHandler(sys.stdout)
@@ -208,7 +133,7 @@ def setup_logging(
208133
root_logger.addHandler(stderr_handler)
209134

210135
# Create a logger for our package
211-
logger = logging.getLogger("codegate")
136+
logger = structlog.get_logger("codegate")
212137
logger.debug(
213138
"Logging initialized",
214139
extra={
@@ -217,5 +142,3 @@ def setup_logging(
217142
"handlers": ["stdout", "stderr"],
218143
},
219144
)
220-
221-
return logger

src/codegate/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
from pathlib import Path
66
from typing import Dict, Optional, Union
77

8+
import structlog
89
import yaml
910

10-
from codegate.codegate_logging import LogFormat, LogLevel, setup_logging
11+
from codegate.codegate_logging import LogFormat, LogLevel
1112
from codegate.exceptions import ConfigurationError
1213
from codegate.prompts import PromptConfig
1314

14-
logger = setup_logging()
15+
logger = structlog.get_logger("codegate")
1516

1617
# Default provider URLs
1718
DEFAULT_PROVIDER_URLS = {

src/codegate/inference/inference_engine.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import structlog
12
from llama_cpp import Llama
23

3-
from codegate.codegate_logging import setup_logging
4+
logger = structlog.get_logger("codegate")
45

56

67
class LlamaCppInferenceEngine:
@@ -21,7 +22,6 @@ def __new__(cls):
2122
def __init__(self):
2223
if not hasattr(self, "models"):
2324
self.__models = {}
24-
self.__logger = setup_logging()
2525

2626
def __del__(self):
2727
self.__close_models()
@@ -32,7 +32,7 @@ async def __get_model(self, model_path, embedding=False, n_ctx=512, n_gpu_layers
3232
is loaded and added to __models and returned.
3333
"""
3434
if model_path not in self.__models:
35-
self.__logger.info(
35+
logger.info(
3636
f"Loading model from {model_path} with parameters "
3737
f"n_gpu_layers={n_gpu_layers} and n_ctx={n_ctx}"
3838
)

src/codegate/pipeline/fim/secret_analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
import structlog
12
from litellm import ChatCompletionRequest
23

3-
from codegate.codegate_logging import setup_logging
44
from codegate.pipeline.base import PipelineContext, PipelineResponse, PipelineResult, PipelineStep
55

6-
logger = setup_logging()
6+
logger = structlog.get_logger("codegate")
77

88

99
class SecretAnalyzer(PipelineStep):

0 commit comments

Comments
 (0)