Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions mssql_python/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,76 @@ def execute(self, sql, *args):
cursor = self.cursor()
cursor.execute(sql, *args)
return cursor

def add_output_converter(self, sqltype, func) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of self._output_converters for storing and managing output converters is clear and straightforward for single-threaded scenarios. However, if the Connection object is accessed or modified from multiple threads, operations like adding, removing, or clearing converters could result in race conditions or inconsistent state.

If multi-threaded access to the Connection object is expected or possible, please consider protecting accesses to self._output_converters with a threading lock (e.g., threading.Lock). This will ensure thread safety during concurrent modifications.

"""
Register an output converter function that will be called whenever a value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output converter registration API allows arbitrary Python functions to be registered and executed on data retrieved from the database. If this API is public or can be accessed by untrusted users, there is a risk of code injection and arbitrary code execution. This risk is mitigated if only trusted application developers register converters. It is important to document this behavior clearly in the API documentation and warn users not to register converters from untrusted sources.

For the safety of all users, please consider updating the documentation (and relevant docstrings) with a clear "WARNING" that registering output converters is equivalent to executing arbitrary Python code. This should include explicit guidance that only trusted code should be registered as converters, and that converter registration should never be exposed to untrusted or external input.

Example warning for documentation:
⚠️ Warning: Registering an output converter will cause the supplied Python function to be executed on every matching database value. Do not register converters from untrusted sources, as this can result in arbitrary code execution and security vulnerabilities.
Making this risk explicit will help prevent misuse and keep downstream users secure.

with the given SQL type is read from the database.

Args:
sqltype (int): The integer SQL type value to convert, which can be one of the
defined standard constants (e.g. SQL_VARCHAR) or a database-specific
value (e.g. -151 for the SQL Server 2008 geometry data type).
func (callable): The converter function which will be called with a single parameter,
the value, and should return the converted value. If the value is NULL
then the parameter passed to the function will be None, otherwise it
will be a bytes object.

Returns:
None
"""
if not hasattr(self, '_output_converters'):
self._output_converters = {}
self._output_converters[sqltype] = func
# Pass to the underlying connection if native implementation supports it
if hasattr(self._conn, 'add_output_converter'):
self._conn.add_output_converter(sqltype, func)
log('info', f"Added output converter for SQL type {sqltype}")

def get_output_converter(self, sqltype):
"""
Get the output converter function for the specified SQL type.

Args:
sqltype (int or type): The SQL type value or Python type to get the converter for

Returns:
callable or None: The converter function or None if no converter is registered
"""
if not hasattr(self, '_output_converters'):
return None
return self._output_converters.get(sqltype)

def remove_output_converter(self, sqltype):
"""
Remove the output converter function for the specified SQL type.

Args:
sqltype (int or type): The SQL type value to remove the converter for

Returns:
None
"""
if hasattr(self, '_output_converters') and sqltype in self._output_converters:
del self._output_converters[sqltype]
# Pass to the underlying connection if native implementation supports it
if hasattr(self._conn, 'remove_output_converter'):
self._conn.remove_output_converter(sqltype)
log('info', f"Removed output converter for SQL type {sqltype}")

def clear_output_converters(self) -> None:
"""
Remove all output converter functions.

Returns:
None
"""
if hasattr(self, '_output_converters'):
self._output_converters.clear()
# Pass to the underlying connection if native implementation supports it
if hasattr(self._conn, 'clear_output_converters'):
self._conn.clear_output_converters()
log('info', "Cleared all output converters")

def commit(self) -> None:
"""
Expand Down
57 changes: 56 additions & 1 deletion mssql_python/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ def __init__(self, cursor, description, values, column_map=None):
column_map: Optional pre-built column map (for optimization)
"""
self._cursor = cursor
self._values = values
self._description = description

# Apply output converters if available
if hasattr(cursor.connection, '_output_converters') and cursor.connection._output_converters:
self._values = self._apply_output_converters(values)
else:
self._values = values

# TODO: ADO task - Optimize memory usage by sharing column map across rows
# Instead of storing the full cursor_description in each Row object:
Expand All @@ -38,6 +44,55 @@ def __init__(self, cursor, description, values, column_map=None):

self._column_map = column_map

def _apply_output_converters(self, values):
"""
Apply output converters to raw values.

Args:
values: Raw values from the database

Returns:
List of converted values
"""
if not self._description:
return values

converted_values = list(values)

for i, (value, desc) in enumerate(zip(values, self._description)):
if desc is None or value is None:
continue

# Get SQL type from description
sql_type = desc[1] # type_code is at index 1 in description tuple

# Try to get a converter for this type
converter = self._cursor.connection.get_output_converter(sql_type)

# If no converter found for the SQL type but the value is a string or bytes,
# try the WVARCHAR converter as a fallback
if converter is None and isinstance(value, (str, bytes)):
from mssql_python.constants import ConstantsDDBC
converter = self._cursor.connection.get_output_converter(ConstantsDDBC.SQL_WVARCHAR.value)

# If we found a converter, apply it
if converter:
try:
# If value is already a Python type (str, int, etc.),
# we need to convert it to bytes for our converters
if isinstance(value, str):
# Encode as UTF-16LE for string values (SQL_WVARCHAR format)
value_bytes = value.encode('utf-16-le')
converted_values[i] = converter(value_bytes)
else:
converted_values[i] = converter(value)
except Exception as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All exceptions are currently suppressed silently. While it's important not to leak sensitive data, it would be beneficial to at least log the exception at a debug or warning level for troubleshooting purposes. This way, issues with converter functions can be diagnosed without exposing sensitive information.

except Exception: log('debug', 'Exception occurred in output converter', exc_info=True)
This will help with debugging, especially if custom converter functions are used, without leaking sensitive data.

# If conversion fails, keep the original value
# You might want to log this error
pass

return converted_values

def __getitem__(self, index):
"""Allow accessing by numeric index: row[0]"""
return self._values[index]
Expand Down
Loading