Skip to content

Commit 6ccc564

Browse files
authored
Model instance-attribute flow as a Ruby/JS-style field level step (any method)
1 parent ef37de9 commit 6ccc564

3 files changed

Lines changed: 47 additions & 36 deletions

File tree

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
172172
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
173173
predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) {
174174
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
175+
or
176+
localFieldStep(nodeFrom, nodeTo)
175177
}
176178

177179
/**
@@ -318,36 +320,47 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
318320
}
319321

320322
/**
321-
* Holds if `nodeFrom` is the `self` parameter of an `__init__` method and `nodeTo` is
322-
* the `self` parameter of another method on the same class.
323+
* Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first
324+
* parameter of an instance method of `cls` (i.e. an access of the form `self.attr`).
323325
*
324-
* This models flow through instance attributes (`self.foo`): a value stored into
325-
* `self.foo` inside `__init__` can be read from `self.foo` in another method.
326-
* Type-tracking handles the store and read steps via `AttrWrite`/`AttrRead`, but on its
327-
* own it cannot relate the `self` parameter of `__init__` (where the store happens) to
328-
* the `self` parameter of the reading method.
326+
* Static methods and class methods are excluded, since their first parameter is not a
327+
* `self` instance reference.
328+
*/
329+
private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) {
330+
exists(Function method, Name selfUse |
331+
method = cls.getAMethod() and
332+
not DataFlowDispatch::isStaticmethod(method) and
333+
not DataFlowDispatch::isClassmethod(method) and
334+
selfUse.getVariable() = method.getArg(0).(Name).getVariable() and
335+
ref.getObject().asCfgNode().getNode() = selfUse and
336+
ref.mayHaveAttributeName(attr)
337+
)
338+
}
339+
340+
/**
341+
* Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a
342+
* class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance
343+
* method of the same class.
329344
*
330-
* We restrict the source to `self` in `__init__` (rather than allowing flow between the
331-
* `self` parameters of arbitrary method pairs) because `__init__` is guaranteed to run
332-
* before any other method of the instance is used. This keeps the over-approximation
333-
* directional -- attributes assigned during construction flow to later method bodies,
334-
* but we do not pretend that an attribute assigned in one ordinary method is visible in
335-
* another (where there is no such ordering guarantee).
345+
* This models flow through instance attributes (`self.foo`): a value stored into
346+
* `self.foo` in one method can be read from `self.foo` in another method. Type-tracking
347+
* handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot
348+
* relate the `self` of the writing method to the `self` of the reading method. Following
349+
* the approach used for Ruby and JavaScript, we model this directly as a level step from
350+
* the written value to the read reference, for any pair of methods on the class (not
351+
* just from `__init__`).
336352
*
337-
* This is still instance-insensitive (it does not distinguish between different
338-
* instances of the same class), matching the precision of instance-attribute handling
339-
* elsewhere.
353+
* This is an over-approximation: it is instance-insensitive (it does not distinguish
354+
* between different instances of the same class) and order-insensitive (it does not
355+
* require the write to happen before the read), matching the precision of
356+
* instance-attribute handling for Ruby and JavaScript.
340357
*/
341-
private predicate initSelfJumpStep(Node nodeFrom, LocalSourceNode nodeTo) {
342-
exists(Class cls, Function initMethod, Function otherMethod |
343-
initMethod = cls.getAMethod() and
344-
initMethod.getName() = "__init__" and
345-
otherMethod = cls.getAMethod() and
346-
otherMethod != initMethod and
347-
not DataFlowDispatch::isStaticmethod(otherMethod) and
348-
not DataFlowDispatch::isClassmethod(otherMethod) and
349-
nodeFrom.asExpr() = initMethod.getArg(0) and
350-
nodeTo.asExpr() = otherMethod.getArg(0)
358+
private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) {
359+
exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read |
360+
selfAttrRef(cls, attr, write) and
361+
nodeFrom = write.getValue() and
362+
selfAttrRef(cls, attr, read) and
363+
nodeTo = read
351364
)
352365
}
353366

@@ -358,8 +371,6 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
358371
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
359372
or
360373
capturedJumpStep(nodeFrom, nodeTo)
361-
or
362-
initSelfJumpStep(nodeFrom, nodeTo)
363374
}
364375

365376
predicate hasFeatureBacktrackStoreTarget() { any() }

python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,11 @@ class MyClass2(object):
150150
def __init__(self): # $ tracked=foo
151151
self.foo = tracked # $ tracked=foo tracked
152152

153-
def print_foo(self): # $ tracked=foo
154-
print(self.foo) # $ tracked=foo tracked
153+
def print_foo(self): # $ MISSING: tracked=foo
154+
print(self.foo) # $ tracked MISSING: tracked=foo
155155

156-
def possibly_uncalled_method(self): # $ tracked=foo
157-
print(self.foo) # $ tracked=foo tracked
156+
def possibly_uncalled_method(self): # $ MISSING: tracked=foo
157+
print(self.foo) # $ tracked MISSING: tracked=foo
158158

159159
instance = MyClass2()
160160
print(instance.foo) # $ MISSING: tracked=foo tracked

python/ql/test/library-tests/frameworks/hdbcli/pep249.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111

1212
# Connection stored in a class attribute (`self._conn`) and used in another method.
1313
#
14-
# This is detected because type tracking includes a jump step from the `self` parameter of
15-
# `__init__` to the `self` parameter of the other methods on the class, which lets the
16-
# connection stored into `self._conn` during construction be read back from `self._conn`
17-
# (directly or via a getter) in the other methods.
14+
# This is detected because type tracking includes a level step modelling flow through
15+
# instance attributes: a value written to `self._conn` in one method (here `__init__`) can
16+
# be read back from `self._conn` (directly or via a getter) in any other method on the same
17+
# class. This follows the same approach used for instance fields in Ruby and JavaScript.
1818
class Database:
1919
def __init__(self):
2020
self._conn = dbapi.connect(address="hostname", port=300, user="username")

0 commit comments

Comments
 (0)