Skip to content

Commit

Permalink
Fix many linter issues
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-grafberger committed Feb 24, 2024
1 parent 63d0991 commit 4aebf99
Show file tree
Hide file tree
Showing 26 changed files with 104 additions and 103 deletions.
2 changes: 1 addition & 1 deletion demo/feature_overview/no_missing_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def required_inspections(self) -> Iterable[Inspection]:

def evaluate(self, inspection_result: InspectionResult) -> CheckResult:
"""Evaluate the check"""
dag_node_to_missing_embeddings = dict()
dag_node_to_missing_embeddings = {}
for dag_node, dag_node_inspection_result in inspection_result.dag_node_to_inspection_results.items():
if MissingEmbeddings(self.example_threshold) in dag_node_inspection_result:
missing_embedding_info = dag_node_inspection_result[MissingEmbeddings(self.example_threshold)]
Expand Down
2 changes: 1 addition & 1 deletion example_pipelines/healthcare/healthcare.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@

train_data, test_data = train_test_split(data)
model = pipeline.fit(train_data, train_data['label'])
print("Mean accuracy: {}".format(model.score(test_data, test_data['label'])))
print(f"Mean accuracy: {model.score(test_data, test_data['label'])}")
71 changes: 35 additions & 36 deletions experiments/performance/_benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,32 +76,32 @@ def get_code_for_op_benchmark(data_frame_rows, operator_type):
if operator_type == OperatorBenchmarkType.PROJECTION:
benchmark_setup = get_single_df_creation_str(data_frame_rows)
benchmark_exec = get_test_projection_str()
benchmark_setup_func_str = "get_single_df_creation_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_single_df_creation_str({data_frame_rows})"
benchmark_exec_func_str = "get_test_projection_str()"
elif operator_type == OperatorBenchmarkType.SELECTION:
benchmark_setup = get_single_df_creation_str(data_frame_rows)
benchmark_exec = get_test_selection_str()
benchmark_setup_func_str = "get_single_df_creation_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_single_df_creation_str({data_frame_rows})"
benchmark_exec_func_str = "get_test_selection_str()"
elif operator_type == OperatorBenchmarkType.JOIN:
benchmark_setup = get_multiple_dfs_creation_str(data_frame_rows)
benchmark_exec = get_test_join_str()
benchmark_setup_func_str = "get_multiple_dfs_creation_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_multiple_dfs_creation_str({data_frame_rows})"
benchmark_exec_func_str = "get_test_join_str()"
elif operator_type == OperatorBenchmarkType.ONE_HOT_ENCODER:
benchmark_setup = get_np_cat_array_str(data_frame_rows)
benchmark_exec = get_test_one_hot_encoder_str()
benchmark_setup_func_str = "get_np_cat_array_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_np_cat_array_str({data_frame_rows})"
benchmark_exec_func_str = "get_test_one_hot_encoder_str()"
elif operator_type == OperatorBenchmarkType.STANDARD_SCALER:
benchmark_setup = get_np_num_array_str(data_frame_rows)
benchmark_exec = get_test_standard_scaler_str()
benchmark_setup_func_str = "get_np_num_array_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_np_num_array_str({data_frame_rows})"
benchmark_exec_func_str = "get_test_standard_scaler_str()"
elif operator_type == OperatorBenchmarkType.DECISION_TREE:
benchmark_setup = get_estimator_train_data_str(data_frame_rows)
benchmark_exec = get_decision_tree_str()
benchmark_setup_func_str = "get_estimator_train_data_str({})".format(data_frame_rows)
benchmark_setup_func_str = f"get_estimator_train_data_str({data_frame_rows})"
benchmark_exec_func_str = "get_decision_tree_str()"
else:
assert False
Expand Down Expand Up @@ -230,7 +230,7 @@ def prepare_benchmark_exec(benchmark_str, setup_str, inspections):
"""
Get the setup str for timeit
"""
setup = cleandoc("""
setup = cleandoc(f"""
from experiments.performance._empty_inspection import EmptyInspection
from mlinspect.instrumentation._pipeline_executor import singleton
from mlinspect.inspections import HistogramForColumns, RowLineage, MaterializeFirstOutputRows
Expand All @@ -239,57 +239,57 @@ def prepare_benchmark_exec(benchmark_str, setup_str, inspections):
get_test_one_hot_encoder_str, get_np_num_array_str, get_test_standard_scaler_str, \
get_estimator_train_data_str, get_decision_tree_str
test_code_setup = {}
inspector_result = singleton.run(python_code=test_code_setup, inspections={})
test_code_benchmark = {}
""".format(setup_str, inspections, benchmark_str))
test_code_setup = {setup_str}
inspector_result = singleton.run(python_code=test_code_setup, inspections={inspections})
test_code_benchmark = {benchmark_str}
""")
return setup


def trigger_benchmark_exec(inspections_str):
"""
Get the benchmark str for timeit
"""
benchmark = cleandoc("""
inspector_result_two = singleton.run(python_code=test_code_benchmark, inspections={}, reset_state=False)
""".format(inspections_str))
benchmark = cleandoc(f"""
inspector_result_two = singleton.run(python_code=test_code_benchmark, inspections={inspections_str}, reset_state=False)
""")
return benchmark


def prepare_pipeline_benchmark_exec(test_code):
"""
Get the benchmark str for timeit
"""
benchmark = cleandoc("""
benchmark = cleandoc(f"""
from experiments.performance._benchmark_utils import get_adult_simple_py_str, get_adult_complex_py_str, \
get_healthcare_py_str, get_compas_py_str
code = {}
""".format(test_code))
code = {test_code}
""")
return benchmark


def trigger_pipeline_benchmark_exec(inspections_str):
"""
Get the benchmark str for timeit
"""
benchmark = cleandoc("""
benchmark = cleandoc(f"""
from experiments.performance._empty_inspection import EmptyInspection
from mlinspect import PipelineInspector
PipelineInspector\
.on_pipeline_from_string(code)\
.add_required_inspections({}) \
.add_required_inspections({inspections_str}) \
.execute()
""".format(inspections_str))
""")
return benchmark


def get_single_df_creation_str(data_frame_rows):
"""
Get a complete code str that creates a DF with random value
"""
test_code = cleandoc("""
test_code = cleandoc(f"""
import pandas as pd
import numpy as np
from numpy.random import randint
Expand All @@ -305,7 +305,7 @@ def get_single_df_creation_str(data_frame_rows):
group_col_3 = pd.Series(random.choices(categories, k={data_frame_rows}))
df = pd.DataFrame(zip(a, b, c, d, group_col_1, group_col_2, group_col_3), columns=['A', 'B', 'C', 'D',
'group_col_1', 'group_col_2', 'group_col_3'])
""".format(data_frame_rows=data_frame_rows))
""")
return test_code


Expand Down Expand Up @@ -339,7 +339,7 @@ def get_multiple_dfs_creation_str(data_frame_rows):
assert sizes_before_join - start_with_offset == data_frame_rows

# mlinspect does not support some ast nodes yet like *, /, and {}, so we need to avoid them
test_code = cleandoc("""
test_code = cleandoc(f"""
import pandas as pd
import numpy as np
from numpy.random import randint, shuffle
Expand All @@ -362,8 +362,7 @@ def get_multiple_dfs_creation_str(data_frame_rows):
df_a = pd.DataFrame(zip(id_a, a, b, group_col_1, group_col_2, group_col_3), columns=['id', 'A', 'B',
'group_col_1', 'group_col_2', 'group_col_3'])
df_b = pd.DataFrame(zip(id_b, c, d), columns=['id', 'C', 'D'])
""".format(sizes_before_join=sizes_before_join, start_with_offset=start_with_offset,
end_with_offset=end_with_offset))
""")
return test_code


Expand All @@ -381,15 +380,15 @@ def get_np_cat_array_str(data_frame_rows):
"""
Get a complete code str that creates a np array with random values
"""
test_code = cleandoc("""
test_code = cleandoc(f"""
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import random
categories = ['cat_a', 'cat_b', 'cat_c']
group_col_1 = pd.Series(random.choices(categories, k={data_frame_rows}))
df = pd.DataFrame(zip(group_col_1), columns=["group_col_1"])
""".format(data_frame_rows=data_frame_rows))
""")
return test_code


Expand All @@ -408,14 +407,14 @@ def get_np_num_array_str(data_frame_rows):
"""
Get a complete code str that creates a np array with random values
"""
test_code = cleandoc("""
test_code = cleandoc(f"""
from sklearn.preprocessing import StandardScaler
import pandas as pd
from numpy.random import randint
series = randint(0,100,size=({}))
series = randint(0,100,size=({data_frame_rows}))
df = pd.DataFrame(series, columns=["num"])
""".format(data_frame_rows))
""")
return test_code


Expand All @@ -434,7 +433,7 @@ def get_estimator_train_data_str(data_frame_rows):
"""
Get a complete code str that creates a np array with random values
"""
test_code = cleandoc("""
test_code = cleandoc(f"""
from sklearn.preprocessing import StandardScaler
import pandas as pd
from numpy.random import randint
Expand All @@ -447,7 +446,7 @@ def get_estimator_train_data_str(data_frame_rows):
data_df = pd.DataFrame(data)
target_df = pd.DataFrame(target)
""".format(data_frame_rows=data_frame_rows))
""")
return test_code


Expand All @@ -466,7 +465,7 @@ def get_adult_simple_py_str():
"""
Get the code str for the adult_easy pipeline
"""
with open(ADULT_SIMPLE_PY) as file:
with open(ADULT_SIMPLE_PY, encoding="utf-8") as file:
test_code = file.read()
return test_code

Expand All @@ -475,7 +474,7 @@ def get_adult_complex_py_str():
"""
Get the code str for the adult_easy pipeline
"""
with open(ADULT_COMPLEX_PY) as file:
with open(ADULT_COMPLEX_PY, encoding="utf-8") as file:
test_code = file.read()
return test_code

Expand All @@ -484,7 +483,7 @@ def get_compas_py_str():
"""
Get the code str for the adult_easy pipeline
"""
with open(COMPAS_PY) as file:
with open(COMPAS_PY, encoding="utf-8") as file:
test_code = file.read()
return test_code

Expand All @@ -493,6 +492,6 @@ def get_healthcare_py_str():
"""
Get the code str for the adult_easy pipeline
"""
with open(HEALTHCARE_PY) as file:
with open(HEALTHCARE_PY, encoding="utf-8") as file:
test_code = file.read()
return test_code
7 changes: 5 additions & 2 deletions mlinspect/backends/_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,18 @@ class Backend(metaclass=abc.ABCMeta):
The Interface for the different instrumentation backends
"""

@staticmethod
@abc.abstractmethod
def before_call(self, operator_context, input_infos: List[AnnotatedDfObject]) \
def before_call(operator_context, input_infos: List[AnnotatedDfObject]) \
-> List[AnnotatedDfObject]:
"""The value or module a function may be called on"""
# pylint: disable=too-many-arguments, unused-argument
raise NotImplementedError


@staticmethod
@abc.abstractmethod
def after_call(self, operator_context, input_infos: List[AnnotatedDfObject], return_value,
def after_call(operator_context, input_infos: List[AnnotatedDfObject], return_value,
non_data_function_args: Dict[str, any] = MappingProxyType({})) -> BackendResult:
"""The return value of some function"""
# pylint: disable=too-many-arguments, unused-argument
Expand Down
5 changes: 3 additions & 2 deletions mlinspect/backends/_backend_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Some utility functions the different instrumentation backends
"""
# pylint: disable=unnecessary-dunder-call
import itertools

import numpy
Expand Down Expand Up @@ -62,7 +63,7 @@ def get_iterator_for_type(data, np_nditer_with_refs=False, columns=None):
elif isinstance(data, list):
iterator = get_list_row_iterator(data, columns)
else:
raise NotImplementedError("TODO: Support type {}!".format(type(data)))
raise NotImplementedError(f"TODO: Support type {type(data)}!")
return iterator


Expand All @@ -89,7 +90,7 @@ def create_wrapper_with_annotations(annotations_df, return_value) -> AnnotatedDf
elif return_value is None:
new_return_value = AnnotatedDfObject(None, annotations_df)
else:
raise NotImplementedError("A type that is still unsupported was found: {}".format(return_value))
raise NotImplementedError(f"A type that is still unsupported was found: {return_value}")
return new_return_value


Expand Down
4 changes: 2 additions & 2 deletions mlinspect/backends/_pandas_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def after_call(operator_context, input_infos: List[AnnotatedDfObject], return_va
input_infos[1].result_data.drop("mlinspect_index_y", axis=1, inplace=True)

else:
raise NotImplementedError("PandasBackend doesn't know any operations of type '{}' yet!"
.format(operator_context.operator))
raise NotImplementedError(f"PandasBackend doesn't know any operations of type "
f"'{operator_context.operator}' yet!")

return return_value

Expand Down
2 changes: 1 addition & 1 deletion mlinspect/checks/_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ def __hash__(self):

def __repr__(self):
"""Checks must have a str representation"""
return "{}({})".format(self.__class__.__name__, self.check_id or "")
return f"{self.__class__.__name__}({self.check_id or ''})"
9 changes: 4 additions & 5 deletions mlinspect/checks/_no_bias_introduced_for.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,9 @@ def evaluate(self, inspection_result: InspectionResult) -> CheckResult:
column_result = self.get_histograms_for_node_and_column(column, histograms, node, parents)
column_results[column] = column_result
if not column_result.acceptable_change:
issue = "A {} causes a min_relative_ratio_change of '{}' by {}, a value below the " \
"configured minimum threshold {}!" \
.format(node.operator_info.operator.value, column, column_result.min_relative_ratio_change,
self.min_allowed_relative_ratio_change)
issue = f"A {node.operator_info.operator.value} causes a min_relative_ratio_change of '{column}' " \
f"by {column_result.min_relative_ratio_change}, a value below the " \
f"configured minimum threshold {self.min_allowed_relative_ratio_change}!"
issue_list.append(issue)
check_status = CheckStatus.FAILURE

Expand Down Expand Up @@ -185,7 +184,7 @@ def get_distribution_changes_overview_as_df(no_bias_check_result: NoBiasIntroduc
descriptions = []
assert isinstance(no_bias_check_result.check, NoBiasIntroducedFor)
sensitive_column_names = no_bias_check_result.check.sensitive_columns
sensitive_column_names = ["'{}' distribution change below the configured minimum test threshold".format(name)
sensitive_column_names = [f"'{name}' distribution change below the configured minimum test threshold"
for name in sensitive_column_names]
sensitive_columns = []
for _ in range(len(sensitive_column_names)):
Expand Down
2 changes: 1 addition & 1 deletion mlinspect/checks/_no_illegal_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def evaluate(self, inspection_result: InspectionResult) -> CheckResult:
forbidden_columns = {*ILLEGAL_FEATURES, *self.additional_illegal_feature_names}
used_illegal_columns = list(set(used_columns).intersection(forbidden_columns))
if used_illegal_columns:
description = "Used illegal columns: {}".format(used_illegal_columns)
description = f"Used illegal columns: {used_illegal_columns}"
result = NoIllegalFeaturesResult(self, CheckStatus.FAILURE, description, used_illegal_columns)
else:
result = NoIllegalFeaturesResult(self, CheckStatus.SUCCESS, None, [])
Expand Down
11 changes: 5 additions & 6 deletions mlinspect/checks/_similar_removal_probabilities_for.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,9 @@ def evaluate(self, inspection_result: InspectionResult) -> CheckResult:
column_result = self.get_histograms_for_node_and_column(column, histograms, node, parents)
column_results[column] = column_result
if not column_result.acceptable_probability_difference:
issue = "A {} causes a max_probability_difference of '{}' by {}, a value above the " \
"configured maximum threshold {}!" \
.format(node.operator_info.operator.value, column, column_result.max_probability_difference,
self.max_allowed_probability_difference)
issue = f"A {node.operator_info.operator.value} causes a max_probability_difference of " \
f"'{column}' by {column_result.max_probability_difference}, a value above the " \
f"configured maximum threshold {self.max_allowed_probability_difference}!"
issue_list.append(issue)
check_status = CheckStatus.FAILURE

Expand Down Expand Up @@ -224,8 +223,8 @@ def get_removal_probabilities_overview_as_df(removal_probab_check_result: Simila
assert isinstance(removal_probab_check_result.check, SimilarRemovalProbabilitiesFor)
sensitive_column_names = []
for name in removal_probab_check_result.check.sensitive_columns:
removal_probability_column_name = "'{}' probability difference below the configured maximum test " \
"threshold".format(name)
removal_probability_column_name = f"'{name}' probability difference below the configured maximum test " \
"threshold"
sensitive_column_names.append(removal_probability_column_name)

sensitive_columns = []
Expand Down
2 changes: 1 addition & 1 deletion mlinspect/inspections/_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ def __hash__(self):

def __repr__(self):
"""Inspections must have a str representation"""
return "{}({})".format(self.__class__.__name__, self.inspection_id)
return f"{self.__class__.__name__}({self.inspection_id})"
Loading

0 comments on commit 4aebf99

Please sign in to comment.