diff --git a/python/ql/integration-tests/query-suite/python-code-quality.qls.expected b/python/ql/integration-tests/query-suite/python-code-quality.qls.expected index c2168cab937b..b93829875d20 100644 --- a/python/ql/integration-tests/query-suite/python-code-quality.qls.expected +++ b/python/ql/integration-tests/query-suite/python-code-quality.qls.expected @@ -1,3 +1,4 @@ +ql/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql ql/python/ql/src/Functions/IterReturnsNonSelf.ql ql/python/ql/src/Functions/NonCls.ql ql/python/ql/src/Functions/NonSelf.ql diff --git a/python/ql/integration-tests/query-suite/python-security-and-quality.qls.expected b/python/ql/integration-tests/query-suite/python-security-and-quality.qls.expected index e391dea95cd7..170d9f442f92 100644 --- a/python/ql/integration-tests/query-suite/python-security-and-quality.qls.expected +++ b/python/ql/integration-tests/query-suite/python-security-and-quality.qls.expected @@ -4,7 +4,7 @@ ql/python/ql/src/Classes/EqualsOrHash.ql ql/python/ql/src/Classes/EqualsOrNotEquals.ql ql/python/ql/src/Classes/IncompleteOrdering.ql ql/python/ql/src/Classes/InconsistentMRO.ql -ql/python/ql/src/Classes/InitCallsSubclassMethod.ql +ql/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql ql/python/ql/src/Classes/MissingCallToDel.ql ql/python/ql/src/Classes/MissingCallToInit.ql ql/python/ql/src/Classes/MutatingDescriptor.ql diff --git a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.qhelp b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.qhelp new file mode 100644 index 000000000000..be8ba98c613c --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.qhelp @@ -0,0 +1,48 @@ + + + +

+When initializing an instance of the class in the class' __init__ method, calls tha are made using the instance may receive an instance of the class that is not +yet fully initialized. When a method called in an initializer is overridden in a subclass, the subclass method receives the instance +in a potentially unexpected state. Fields that would be initialized after the call, including potentially in the subclass' __init__ method, +will not be initialized. This may lead to runtime errors, as well as make the code more difficult to maintain, as future changes may not +be aware of which fields would not be initialized. +

+ +
+ + +

If possible, refactor the initializer method such that initialization is complete before calling any overridden methods. +For helper methods used as part of initialization, avoid overriding them, and instead call any additional logic required +in the subclass' __init__ method. +

+

+If the overridden method does not depend on the instance self, and only on its class, consider making it a @classmethod or @staticmethod instead. +

+

+If calling an overridden method is absolutely required, consider marking it as an internal method (by using an _ prefix) to +discourage external users of the library from overriding it and observing partially initialized state, and ensure that the fact it is called during initialization +is mentioned in the documentation. +

+ +
+ +

In the following case, the __init__ method of Super calls the set_up method that is overridden by Sub. +This results in Sub.set_up being called with a partially initialized instance of Super which may be unexpected.

+ +

In the following case, the initialization methods are separate between the superclass and the subclass.

+ +
+ + + +
  • CERT Secure Coding: +Rule MET05-J. Reference discusses Java but is applicable to object oriented programming in many languages.
  • +
  • StackOverflow: Overridable method calls in constructors.
  • +
  • Python documentation: @classmethod.
  • + + +
    +
    diff --git a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql new file mode 100644 index 000000000000..32eb5ffe79e5 --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql @@ -0,0 +1,58 @@ +/** + * @name `__init__` method calls overridden method + * @description Calling a method from `__init__` that is overridden by a subclass may result in a partially + * initialized instance being observed. + * @kind problem + * @tags reliability + * correctness + * quality + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/init-calls-subclass + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.internal.DataFlowDispatch + +predicate initSelfCallOverridden( + Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target, + Function override +) { + init.isInitMethod() and + call.getScope() = init and + exists(Class superclass, Class subclass, DataFlow::ParameterNode selfArg | + superclass = init.getScope() and + subclass = override.getScope() and + subclass = getADirectSubclass+(superclass) and + selfArg.getParameter() = init.getArg(0) and + DataFlow::localFlow(selfArg, self) and + call.calls(self, override.getName()) and + target = superclass.getAMethod() and + target.getName() = override.getName() + ) +} + +predicate readsFromSelf(Function method) { + exists(DataFlow::ParameterNode self, DataFlow::Node sink | + self.getParameter() = method.getArg(0) and + DataFlow::localFlow(self, sink) + | + sink instanceof DataFlow::ArgumentNode + or + sink = any(DataFlow::AttrRead a).getObject() + ) +} + +from + Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target, + Function override +where + initSelfCallOverridden(init, self, call, target, override) and + readsFromSelf(override) and + not isClassmethod(override) and + not isStaticmethod(override) and + not target.getName().matches("\\_%") +select call, "This call to $@ in an initialization method is overridden by $@.", target, + target.getQualifiedName(), override, override.getQualifiedName() diff --git a/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodBad.py b/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodBad.py new file mode 100644 index 000000000000..4bb5466267dc --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodBad.py @@ -0,0 +1,23 @@ +class Super(object): + + def __init__(self, arg): + self._state = "Not OK" + self.set_up(arg) # BAD: This method is overridden, so `Sub.set_up` receives a partially initialized instance. + self._state = "OK" + + def set_up(self, arg): + "Do some setup" + self.a = 2 + +class Sub(Super): + + def __init__(self, arg): + super().__init__(arg) + self.important_state = "OK" + + def set_up(self, arg): + super().set_up(arg) + "Do some more setup" + # BAD: at this point `self._state` is set to `"Not OK"`, and `self.important_state` is not initialized. + if self._state == "OK": + self.b = self.a + 2 diff --git a/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodGood.py b/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodGood.py new file mode 100644 index 000000000000..ebf501f06642 --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclass/examples/InitCallsSubclassMethodGood.py @@ -0,0 +1,24 @@ +class Super(object): + + def __init__(self, arg): + self._state = "Not OK" + self.super_set_up(arg) # GOOD: This isn't overriden. Instead, additional setup the subclass needs is called by the subclass' `__init__ method.` + self._state = "OK" + + def super_set_up(self, arg): + "Do some setup" + self.a = 2 + + +class Sub(Super): + + def __init__(self, arg): + super().__init__(arg) + self.sub_set_up(self, arg) + self.important_state = "OK" + + + def sub_set_up(self, arg): + "Do some more setup" + if self._state == "OK": + self.b = self.a + 2 \ No newline at end of file diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.py b/python/ql/src/Classes/InitCallsSubclassMethod.py deleted file mode 100644 index 6e0dedb0142d..000000000000 --- a/python/ql/src/Classes/InitCallsSubclassMethod.py +++ /dev/null @@ -1,48 +0,0 @@ -#Superclass __init__ calls subclass method - -class Super(object): - - def __init__(self, arg): - self._state = "Not OK" - self.set_up(arg) - self._state = "OK" - - def set_up(self, arg): - "Do some set up" - -class Sub(Super): - - def __init__(self, arg): - Super.__init__(self, arg) - self.important_state = "OK" - - def set_up(self, arg): - Super.set_up(self, arg) - "Do some more set up" # Dangerous as self._state is "Not OK" - - -#Improved version with inheritance: - -class Super(object): - - def __init__(self, arg): - self._state = "Not OK" - self.super_set_up(arg) - self._state = "OK" - - def super_set_up(self, arg): - "Do some set up" - - -class Sub(Super): - - def __init__(self, arg): - Super.__init__(self, arg) - self.sub_set_up(self, arg) - self.important_state = "OK" - - - def sub_set_up(self, arg): - "Do some more set up" - - diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.qhelp b/python/ql/src/Classes/InitCallsSubclassMethod.qhelp deleted file mode 100644 index 72904a0bd296..000000000000 --- a/python/ql/src/Classes/InitCallsSubclassMethod.qhelp +++ /dev/null @@ -1,42 +0,0 @@ - - - -

    -When an instance of a class is initialized, the super-class state should be -fully initialized before it becomes visible to the subclass. -Calling methods of the subclass in the superclass' __init__ -method violates this important invariant. -

    - -
    - - -

    Do not use methods that are subclassed in the construction of an object. -For simpler cases move the initialization into the superclass' __init__ method, -preventing it being overridden. Additional initialization of subclass should -be done in the __init__ method of the subclass. -For more complex cases, it is advisable to use a static method or function to manage -object creation. -

    - -

    Alternatively, avoid inheritance altogether using composition instead.

    - -
    - - - - - - - -
  • CERT Secure Coding: -Rule MET05-J. Although this is a Java rule it applies to most object-oriented languages.
  • -
  • Python Standard Library: Static methods.
  • -
  • Wikipedia: Composition over inheritance.
  • - - - -
    -
    diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.ql b/python/ql/src/Classes/InitCallsSubclassMethod.ql deleted file mode 100644 index 2fb6e90dd7d5..000000000000 --- a/python/ql/src/Classes/InitCallsSubclassMethod.ql +++ /dev/null @@ -1,30 +0,0 @@ -/** - * @name `__init__` method calls overridden method - * @description Calling a method from `__init__` that is overridden by a subclass may result in a partially - * initialized instance being observed. - * @kind problem - * @tags reliability - * correctness - * @problem.severity warning - * @sub-severity low - * @precision high - * @id py/init-calls-subclass - */ - -import python - -from - ClassObject supercls, string method, Call call, FunctionObject overriding, - FunctionObject overridden -where - exists(FunctionObject init, SelfAttribute sa | - supercls.declaredAttribute("__init__") = init and - call.getScope() = init.getFunction() and - call.getFunc() = sa - | - sa.getName() = method and - overridden = supercls.declaredAttribute(method) and - overriding.overrides(overridden) - ) -select call, "Call to self.$@ in __init__ method, which is overridden by $@.", overridden, method, - overriding, overriding.descriptiveString() diff --git a/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.expected b/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.expected index d3cde45c1ff2..f06282e133b6 100644 --- a/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.expected +++ b/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.expected @@ -1 +1,2 @@ -| init_calls_subclass.py:7:9:7:24 | Attribute() | Call to self.$@ in __init__ method, which is overridden by $@. | init_calls_subclass.py:10:5:10:26 | Function set_up | set_up | init_calls_subclass.py:19:5:19:26 | Function set_up | method Sub.set_up | +| init_calls_subclass.py:8:13:8:28 | ControlFlowNode for Attribute() | This call to $@ in an initialization method is overridden by $@. | init_calls_subclass.py:11:9:11:30 | Function set_up | bad1.Super.set_up | init_calls_subclass.py:20:9:20:30 | Function set_up | bad1.Sub.set_up | +| init_calls_subclass.py:32:13:32:27 | ControlFlowNode for Attribute() | This call to $@ in an initialization method is overridden by $@. | init_calls_subclass.py:34:9:34:27 | Function postproc | bad2.Super.postproc | init_calls_subclass.py:43:9:43:27 | Function postproc | bad2.Sub.postproc | diff --git a/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.qlref b/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.qlref index f820a30b11a2..6530409f90ac 100644 --- a/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.qlref +++ b/python/ql/test/query-tests/Classes/init-calls-subclass-method/InitCallsSubclassMethod.qlref @@ -1 +1 @@ -Classes/InitCallsSubclassMethod.ql +Classes/InitCallsSubclass/InitCallsSubclassMethod.ql diff --git a/python/ql/test/query-tests/Classes/init-calls-subclass-method/init_calls_subclass.py b/python/ql/test/query-tests/Classes/init-calls-subclass-method/init_calls_subclass.py index 3248f8d83034..ef944a9c7ef5 100644 --- a/python/ql/test/query-tests/Classes/init-calls-subclass-method/init_calls_subclass.py +++ b/python/ql/test/query-tests/Classes/init-calls-subclass-method/init_calls_subclass.py @@ -1,22 +1,75 @@ #Superclass __init__ calls subclass method -class Super(object): +def bad1(): + class Super: - def __init__(self, arg): - self._state = "Not OK" - self.set_up(arg) - self._state = "OK" + def __init__(self, arg): + self._state = "Not OK" + self.set_up(arg) # BAD: set_up is overriden. + self._state = "OK" - def set_up(self, arg): - "Do some set up" + def set_up(self, arg): + "Do some set up" -class Sub(Super): + class Sub(Super): - def __init__(self, arg): - Super.__init__(self, arg) - self.important_state = "OK" + def __init__(self, arg): + super().__init__(arg) + self.important_state = "OK" - def set_up(self, arg): - Super.set_up(self, arg) - "Do some more set up" # Dangerous as self._state is "Not OK" and - # self.important_state is uninitialized \ No newline at end of file + def set_up(self, arg): + super().set_up(arg) + "Do some more set up" # `self` is partially initialized + if self.important_state == "OK": + pass + +def bad2(): + class Super: + def __init__(self, arg): + self.a = arg + # BAD: postproc is called after initialization. This is still an issue + # since it may still occur before all initialization on a subclass is complete. + self.postproc() + + def postproc(self): + if self.a == 1: + pass + + class Sub(Super): + def __init__(self, arg): + super().__init__(arg) + self.b = 3 + + def postproc(self): + if self.a == 2 and self.b == 3: + pass + +def good3(): + class Super: + def __init__(self, arg): + self.a = arg + self.set_b() # OK: Here `set_b` is used for initialization, but does not read the partially initialized state of `self`. + self.c = 1 + + def set_b(self): + self.b = 3 + + class Sub(Super): + def set_b(self): + self.b = 4 + +def good4(): + class Super: + def __init__(self, arg): + self.a = arg + # OK: Here `_set_b` is likely an internal method (as indicated by the _ prefix). + # We assume thus that regular consumers of the library will not override it, and classes that do are internal and account for `self`'s partially initialized state. + self._set_b() + self.c = 1 + + def _set_b(self): + self.b = 3 + + class Sub(Super): + def _set_b(self): + self.b = self.a+1 \ No newline at end of file